Improve DocValuesConsumerUtil#compatibleWithOptimizedMerge(...) (#126894)

The compatibleWithOptimizedMerge() method doesn't the case when doc value fields don't exist in all segments. It is like that for smaller segments not all fields exist. The change addresses that by skipping such doc value producers instead of returning that a field can't be merged using the optimized method.
This commit is contained in:
Martijn van Groningen 2025-04-17 09:24:04 +02:00 committed by GitHub
parent 814c8c5a6a
commit 4b05fed0d9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 86 additions and 0 deletions

View file

@ -47,6 +47,10 @@ class DocValuesConsumerUtil {
if (docValuesProducer instanceof XPerFieldDocValuesFormat.FieldsReader perFieldReader) {
var wrapped = perFieldReader.getDocValuesProducer(fieldInfo);
if (wrapped == null) {
continue;
}
if (wrapped instanceof ES819TSDBDocValuesProducer tsdbDocValuesProducer) {
switch (fieldInfo.getDocValuesType()) {
case NUMERIC -> {

View file

@ -159,6 +159,88 @@ public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests
}
}
public void testTwoSegmentsTwoDifferentFields() throws Exception {
String timestampField = "@timestamp";
String hostnameField = "host.name";
long timestamp = 1704067200000L;
var config = getTimeSeriesIndexWriterConfig(hostnameField, timestampField);
try (var dir = newDirectory(); var iw = new IndexWriter(dir, config)) {
long counter1 = 0;
long counter2 = 10_000_000;
{
var d = new Document();
d.add(new SortedDocValuesField(hostnameField, new BytesRef("host-001")));
d.add(new SortedNumericDocValuesField(timestampField, timestamp - 1));
d.add(new NumericDocValuesField("counter_1", counter1));
d.add(new SortedNumericDocValuesField("gauge_1", 2));
iw.addDocument(d);
iw.commit();
}
{
var d = new Document();
d.add(new SortedDocValuesField(hostnameField, new BytesRef("host-001")));
d.add(new SortedNumericDocValuesField(timestampField, timestamp));
d.add(new SortedNumericDocValuesField("counter_2", counter2));
d.add(new SortedNumericDocValuesField("gauge_2", -2));
iw.addDocument(d);
iw.commit();
}
iw.forceMerge(1);
try (var reader = DirectoryReader.open(iw)) {
assertEquals(1, reader.leaves().size());
assertEquals(2, reader.maxDoc());
var leaf = reader.leaves().get(0).reader();
var hostNameDV = leaf.getSortedDocValues(hostnameField);
assertNotNull(hostNameDV);
var timestampDV = DocValues.unwrapSingleton(leaf.getSortedNumericDocValues(timestampField));
assertNotNull(timestampDV);
var counterOneDV = leaf.getNumericDocValues("counter_1");
assertNotNull(counterOneDV);
var counterTwoDV = leaf.getSortedNumericDocValues("counter_2");
assertNotNull(counterTwoDV);
var gaugeOneDV = leaf.getSortedNumericDocValues("gauge_1");
assertNotNull(gaugeOneDV);
var gaugeTwoDV = leaf.getSortedNumericDocValues("gauge_2");
assertNotNull(gaugeTwoDV);
for (int i = 0; i < 2; i++) {
assertEquals(i, hostNameDV.nextDoc());
assertEquals("host-001", hostNameDV.lookupOrd(hostNameDV.ordValue()).utf8ToString());
assertEquals(i, timestampDV.nextDoc());
long actualTimestamp = timestampDV.longValue();
assertTrue(actualTimestamp == timestamp || actualTimestamp == timestamp - 1);
if (counterOneDV.advanceExact(i)) {
long counterOneValue = counterOneDV.longValue();
assertEquals(counter1, counterOneValue);
}
if (counterTwoDV.advanceExact(i)) {
assertEquals(1, counterTwoDV.docValueCount());
long counterTwoValue = counterTwoDV.nextValue();
assertEquals(counter2, counterTwoValue);
}
if (gaugeOneDV.advanceExact(i)) {
assertEquals(1, gaugeOneDV.docValueCount());
long gaugeOneValue = gaugeOneDV.nextValue();
assertEquals(2, gaugeOneValue);
}
if (gaugeTwoDV.advanceExact(i)) {
assertEquals(1, gaugeTwoDV.docValueCount());
long gaugeTwoValue = gaugeTwoDV.nextValue();
assertEquals(-2, gaugeTwoValue);
}
}
}
}
}
public void testForceMergeSparseCase() throws Exception {
String timestampField = "@timestamp";
String hostnameField = "host.name";