Tsdb doc values inline building jump table (#126499)

Build jump table (disi) while iterating over SortedNumericDocValues for encoding the values, instead of separately iterating over SortedNumericDocValues just to build the jump table.

In case when indexing sorting is active, this requires an additional merge sort. Follow up from #125403
This commit is contained in:
Martijn van Groningen 2025-04-17 12:08:16 +02:00 committed by GitHub
parent 4248c9908c
commit 0d41e9a2a5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 955 additions and 143 deletions

View file

@ -39,7 +39,6 @@ import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.profile.AsyncProfiler;
@ -51,9 +50,8 @@ import org.openjdk.jmh.runner.options.OptionsBuilder;
import java.io.IOException;
import java.nio.file.Files;
import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;
@BenchmarkMode(Mode.SingleShotTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@ -71,23 +69,10 @@ public class TSDBDocValuesMergeBenchmark {
LogConfigurator.setNodeName("test");
}
@Param("20431204")
private int nDocs;
@Param("1000")
private int deltaTime;
@Param("42")
private int seed;
private static final String TIMESTAMP_FIELD = "@timestamp";
private static final String HOSTNAME_FIELD = "host.name";
private static final long BASE_TIMESTAMP = 1704067200000L;
private IndexWriter indexWriterWithoutOptimizedMerge;
private IndexWriter indexWriterWithOptimizedMerge;
private ExecutorService executorService;
public static void main(String[] args) throws RunnerException {
final Options options = new OptionsBuilder().include(TSDBDocValuesMergeBenchmark.class.getSimpleName())
.addProfiler(AsyncProfiler.class)
@ -96,78 +81,168 @@ public class TSDBDocValuesMergeBenchmark {
new Runner(options).run();
}
@Setup(Level.Trial)
public void setup() throws IOException {
executorService = Executors.newSingleThreadExecutor();
@State(Scope.Benchmark)
public static class StateDenseWithoutOptimizeMerge {
final Directory tempDirectoryWithoutDocValuesSkipper = FSDirectory.open(Files.createTempDirectory("temp1-"));
final Directory tempDirectoryWithDocValuesSkipper = FSDirectory.open(Files.createTempDirectory("temp2-"));
@Param("20431204")
private int nDocs;
@Param("1000")
private int deltaTime;
@Param("42")
private int seed;
private Directory directory;
private final Supplier<IndexWriterConfig> iwc = () -> createIndexWriterConfig(false);
@Setup(Level.Trial)
public void setup() throws IOException {
directory = FSDirectory.open(Files.createTempDirectory("temp2-"));
createIndex(directory, iwc.get(), false, nDocs, deltaTime, seed);
}
indexWriterWithoutOptimizedMerge = createIndex(tempDirectoryWithoutDocValuesSkipper, false);
indexWriterWithOptimizedMerge = createIndex(tempDirectoryWithDocValuesSkipper, true);
}
private IndexWriter createIndex(final Directory directory, final boolean optimizedMergeEnabled) throws IOException {
final var iwc = createIndexWriterConfig(optimizedMergeEnabled);
@Benchmark
public void forceMergeDenseWithoutOptimizedMerge(StateDenseWithoutOptimizeMerge state) throws IOException {
forceMerge(state.directory, state.iwc.get());
}
@State(Scope.Benchmark)
public static class StateDenseWithOptimizeMerge {
@Param("20431204")
private int nDocs;
@Param("1000")
private int deltaTime;
@Param("42")
private int seed;
private Directory directory;
private final Supplier<IndexWriterConfig> iwc = () -> createIndexWriterConfig(true);
@Setup(Level.Trial)
public void setup() throws IOException {
directory = FSDirectory.open(Files.createTempDirectory("temp1-"));
createIndex(directory, iwc.get(), false, nDocs, deltaTime, seed);
}
}
@Benchmark
public void forceMergeDenseWithOptimizedMerge(StateDenseWithOptimizeMerge state) throws IOException {
forceMerge(state.directory, state.iwc.get());
}
@State(Scope.Benchmark)
public static class StateSparseWithoutOptimizeMerge {
@Param("20431204")
private int nDocs;
@Param("1000")
private int deltaTime;
@Param("42")
private int seed;
private Directory directory;
private final Supplier<IndexWriterConfig> iwc = () -> createIndexWriterConfig(false);
@Setup(Level.Trial)
public void setup() throws IOException {
directory = FSDirectory.open(Files.createTempDirectory("temp4-"));
createIndex(directory, iwc.get(), true, nDocs, deltaTime, seed);
}
}
@Benchmark
public void forceMergeSparseWithoutOptimizedMerge(StateSparseWithoutOptimizeMerge state) throws IOException {
forceMerge(state.directory, state.iwc.get());
}
@State(Scope.Benchmark)
public static class StateSparseWithOptimizeMerge {
@Param("20431204")
private int nDocs;
@Param("1000")
private int deltaTime;
@Param("42")
private int seed;
private Directory directory;
private final Supplier<IndexWriterConfig> iwc = () -> createIndexWriterConfig(true);
@Setup(Level.Trial)
public void setup() throws IOException {
directory = FSDirectory.open(Files.createTempDirectory("temp3-"));
createIndex(directory, iwc.get(), true, nDocs, deltaTime, seed);
}
}
@Benchmark
public void forceMergeSparseWithOptimizedMerge(StateSparseWithOptimizeMerge state) throws IOException {
forceMerge(state.directory, state.iwc.get());
}
private void forceMerge(Directory directory, IndexWriterConfig config) throws IOException {
try (var indexWriter = new IndexWriter(directory, config)) {
indexWriter.forceMerge(1);
}
}
static void createIndex(Directory directory, IndexWriterConfig iwc, boolean sparse, int nDocs, int deltaTime, int seed)
throws IOException {
long counter1 = 0;
long counter2 = 10_000_000;
long[] gauge1Values = new long[] { 2, 4, 6, 8, 10, 12, 14, 16 };
long[] gauge2Values = new long[] { -2, -4, -6, -8, -10, -12, -14, -16 };
int numHosts = 1000;
int numHosts = 10000;
String[] tags = new String[] { "tag_1", "tag_2", "tag_3", "tag_4", "tag_5", "tag_6", "tag_7", "tag_8" };
final Random random = new Random(seed);
IndexWriter indexWriter = new IndexWriter(directory, iwc);
for (int i = 0; i < nDocs; i++) {
final Document doc = new Document();
try (var indexWriter = new IndexWriter(directory, iwc)) {
for (int i = 0; i < nDocs; i++) {
final Document doc = new Document();
final int batchIndex = i / numHosts;
final String hostName = "host-" + batchIndex;
// Slightly vary the timestamp in each document
final long timestamp = BASE_TIMESTAMP + ((i % numHosts) * deltaTime) + random.nextInt(0, deltaTime);
final int batchIndex = i % numHosts;
final String hostName = "host-" + batchIndex;
// Slightly vary the timestamp in each document
final long timestamp = BASE_TIMESTAMP + ((i % numHosts) * deltaTime) + random.nextInt(0, deltaTime);
doc.add(new SortedDocValuesField(HOSTNAME_FIELD, new BytesRef(hostName)));
doc.add(new SortedNumericDocValuesField(TIMESTAMP_FIELD, timestamp));
doc.add(new SortedNumericDocValuesField("counter_1", counter1++));
doc.add(new SortedNumericDocValuesField("counter_2", counter2++));
doc.add(new SortedNumericDocValuesField("gauge_1", gauge1Values[i % gauge1Values.length]));
doc.add(new SortedNumericDocValuesField("gauge_2", gauge2Values[i % gauge1Values.length]));
int numTags = tags.length % (i + 1);
for (int j = 0; j < numTags; j++) {
doc.add(new SortedSetDocValuesField("tags", new BytesRef(tags[j])));
}
indexWriter.addDocument(doc);
}
indexWriter.commit();
return indexWriter;
}
@Benchmark
public void forceMergeWithoutOptimizedMerge() throws IOException {
forceMerge(indexWriterWithoutOptimizedMerge);
}
@Benchmark
public void forceMergeWithOptimizedMerge() throws IOException {
forceMerge(indexWriterWithOptimizedMerge);
}
private void forceMerge(final IndexWriter indexWriter) throws IOException {
indexWriter.forceMerge(1);
}
@TearDown(Level.Trial)
public void tearDown() {
if (executorService != null) {
executorService.shutdown();
try {
if (executorService.awaitTermination(30, TimeUnit.SECONDS) == false) {
executorService.shutdownNow();
doc.add(new SortedDocValuesField(HOSTNAME_FIELD, new BytesRef(hostName)));
doc.add(new SortedNumericDocValuesField(TIMESTAMP_FIELD, timestamp));
if (sparse == false || random.nextBoolean()) {
doc.add(new SortedNumericDocValuesField("counter_1", counter1++));
}
if (sparse == false || random.nextBoolean()) {
doc.add(new SortedNumericDocValuesField("counter_2", counter2++));
}
if (sparse == false || random.nextBoolean()) {
doc.add(new SortedNumericDocValuesField("gauge_1", gauge1Values[i % gauge1Values.length]));
}
if (sparse == false || random.nextBoolean()) {
doc.add(new SortedNumericDocValuesField("gauge_2", gauge2Values[i % gauge1Values.length]));
}
if (sparse == false || random.nextBoolean()) {
int numTags = tags.length % (i + 1);
for (int j = 0; j < numTags; j++) {
doc.add(new SortedSetDocValuesField("tags", new BytesRef(tags[j])));
}
}
indexWriter.addDocument(doc);
if (i % 10000 == 0) {
indexWriter.commit();
}
} catch (InterruptedException e) {
executorService.shutdownNow();
Thread.currentThread().interrupt();
}
}
}

View file

@ -0,0 +1,196 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.index.codec.tsdb.es819;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.elasticsearch.core.SuppressForbidden;
import java.io.Closeable;
import java.io.IOException;
/**
* Fork of {@link org.apache.lucene.codecs.lucene90.IndexedDISI#writeBitSet(DocIdSetIterator, IndexOutput)} but that allows
* building jump list iteratively by one docid at a time instead of relying on docidset iterator.
*/
final class DISIAccumulator implements Closeable {
private static final int BLOCK_SIZE = 65536; // The number of docIDs that a single block represents
private static final int DENSE_BLOCK_LONGS = BLOCK_SIZE / Long.SIZE; // 1024
public static final byte DEFAULT_DENSE_RANK_POWER = 9; // Every 512 docIDs / 8 longs
static final int MAX_ARRAY_LENGTH = (1 << 12) - 1;
final Directory dir;
final IOContext context;
final String skipListTempFileName;
final IndexOutput disiTempOutput;
final byte denseRankPower;
final long origo;
int totalCardinality = 0;
int blockCardinality = 0;
final FixedBitSet buffer = new FixedBitSet(1 << 16);
int[] jumps = new int[ArrayUtil.oversize(1, Integer.BYTES * 2)];
int prevBlock = -1;
int jumpBlockIndex = 0;
DISIAccumulator(Directory dir, IOContext context, IndexOutput data, byte denseRankPower) throws IOException {
this.dir = dir;
this.context = context;
this.denseRankPower = denseRankPower;
if ((denseRankPower < 7 || denseRankPower > 15) && denseRankPower != -1) {
throw new IllegalArgumentException(
"Acceptable values for denseRankPower are 7-15 (every 128-32768 docIDs). "
+ "The provided power was "
+ denseRankPower
+ " (every "
+ (int) Math.pow(2, denseRankPower)
+ " docIDs)"
);
}
this.disiTempOutput = dir.createTempOutput(data.getName(), "disi", context);
this.skipListTempFileName = disiTempOutput.getName();
this.origo = disiTempOutput.getFilePointer(); // All jumps are relative to the origo
}
void addDocId(int doc) throws IOException {
final int block = doc >>> 16;
if (prevBlock != -1 && block != prevBlock) {
// Track offset+index from previous block up to current
jumps = addJumps(jumps, disiTempOutput.getFilePointer() - origo, totalCardinality, jumpBlockIndex, prevBlock + 1);
jumpBlockIndex = prevBlock + 1;
// Flush block
flush(prevBlock, buffer, blockCardinality, denseRankPower, disiTempOutput);
// Reset for next block
buffer.clear();
totalCardinality += blockCardinality;
blockCardinality = 0;
}
buffer.set(doc & 0xFFFF);
blockCardinality++;
prevBlock = block;
}
short build(IndexOutput data) throws IOException {
if (blockCardinality > 0) {
jumps = addJumps(jumps, disiTempOutput.getFilePointer() - origo, totalCardinality, jumpBlockIndex, prevBlock + 1);
totalCardinality += blockCardinality;
flush(prevBlock, buffer, blockCardinality, denseRankPower, disiTempOutput);
buffer.clear();
prevBlock++;
}
final int lastBlock = prevBlock == -1 ? 0 : prevBlock; // There will always be at least 1 block (NO_MORE_DOCS)
// Last entry is a SPARSE with blockIndex == 32767 and the single entry 65535, which becomes the
// docID NO_MORE_DOCS
// To avoid creating 65K jump-table entries, only a single entry is created pointing to the
// offset of the
// NO_MORE_DOCS block, with the jumpBlockIndex set to the logical EMPTY block after all real
// blocks.
jumps = addJumps(jumps, disiTempOutput.getFilePointer() - origo, totalCardinality, lastBlock, lastBlock + 1);
buffer.set(DocIdSetIterator.NO_MORE_DOCS & 0xFFFF);
flush(DocIdSetIterator.NO_MORE_DOCS >>> 16, buffer, 1, denseRankPower, disiTempOutput);
// offset+index jump-table stored at the end
short blockCount = flushBlockJumps(jumps, lastBlock + 1, disiTempOutput);
disiTempOutput.close();
try (var addressDataInput = dir.openInput(skipListTempFileName, context)) {
data.copyBytes(addressDataInput, addressDataInput.length());
}
return blockCount;
}
// Adds entries to the offset & index jump-table for blocks
private static int[] addJumps(int[] jumps, long offset, int index, int startBlock, int endBlock) {
assert offset < Integer.MAX_VALUE : "Logically the offset should not exceed 2^30 but was >= Integer.MAX_VALUE";
jumps = ArrayUtil.grow(jumps, (endBlock + 1) * 2);
for (int b = startBlock; b < endBlock; b++) {
jumps[b * 2] = index;
jumps[b * 2 + 1] = (int) offset;
}
return jumps;
}
private static void flush(int block, FixedBitSet buffer, int cardinality, byte denseRankPower, IndexOutput out) throws IOException {
assert block >= 0 && block < BLOCK_SIZE;
out.writeShort((short) block);
assert cardinality > 0 && cardinality <= BLOCK_SIZE;
out.writeShort((short) (cardinality - 1));
if (cardinality > MAX_ARRAY_LENGTH) {
if (cardinality != BLOCK_SIZE) { // all docs are set
if (denseRankPower != -1) {
final byte[] rank = createRank(buffer, denseRankPower);
out.writeBytes(rank, rank.length);
}
for (long word : buffer.getBits()) {
out.writeLong(word);
}
}
} else {
BitSetIterator it = new BitSetIterator(buffer, cardinality);
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
out.writeShort((short) doc);
}
}
}
// Flushes the offset & index jump-table for blocks. This should be the last data written to out
// This method returns the blockCount for the blocks reachable for the jump_table or -1 for no
// jump-table
private static short flushBlockJumps(int[] jumps, int blockCount, IndexOutput out) throws IOException {
if (blockCount == 2) { // Jumps with a single real entry + NO_MORE_DOCS is just wasted space so we ignore
// that
blockCount = 0;
}
for (int i = 0; i < blockCount; i++) {
out.writeInt(jumps[i * 2]); // index
out.writeInt(jumps[i * 2 + 1]); // offset
}
// As there are at most 32k blocks, the count is a short
// The jumpTableOffset will be at lastPos - (blockCount * Long.BYTES)
return (short) blockCount;
}
// Creates a DENSE rank-entry (the number of set bits up to a given point) for the buffer.
// One rank-entry for every {@code 2^denseRankPower} bits, with each rank-entry using 2 bytes.
// Represented as a byte[] for fast flushing and mirroring of the retrieval representation.
private static byte[] createRank(FixedBitSet buffer, byte denseRankPower) {
final int longsPerRank = 1 << (denseRankPower - 6);
final int rankMark = longsPerRank - 1;
final int rankIndexShift = denseRankPower - 7; // 6 for the long (2^6) + 1 for 2 bytes/entry
final byte[] rank = new byte[DENSE_BLOCK_LONGS >> rankIndexShift];
final long[] bits = buffer.getBits();
int bitCount = 0;
for (int word = 0; word < DENSE_BLOCK_LONGS; word++) {
if ((word & rankMark) == 0) { // Every longsPerRank longs
rank[word >> rankIndexShift] = (byte) (bitCount >> 8);
rank[(word >> rankIndexShift) + 1] = (byte) (bitCount & 0xFF);
}
bitCount += Long.bitCount(bits[word]);
}
return rank;
}
@Override
@SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)")
public void close() throws IOException {
IOUtils.close(disiTempOutput);
if (skipListTempFileName != null) {
IOUtils.deleteFilesIgnoringExceptions(dir, skipListTempFileName);
}
}
}

View file

@ -29,6 +29,8 @@ import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.ByteBuffersIndexOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@ -54,6 +56,8 @@ import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.
final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
final Directory dir;
final IOContext context;
IndexOutput data, meta;
final int maxDoc;
private byte[] termsDictBuffer;
@ -70,6 +74,8 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
String metaExtension
) throws IOException {
this.termsDictBuffer = new byte[1 << 14];
this.dir = state.directory;
this.context = state.context;
boolean success = false;
try {
final String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
@ -138,84 +144,101 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
meta.writeLong(numValues);
meta.writeInt(numDocsWithValue);
if (numValues > 0) {
// Special case for maxOrd of 1, signal -1 that no blocks will be written
meta.writeInt(maxOrd != 1 ? ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT : -1);
final ByteBuffersDataOutput indexOut = new ByteBuffersDataOutput();
final DirectMonotonicWriter indexWriter = DirectMonotonicWriter.getInstance(
meta,
new ByteBuffersIndexOutput(indexOut, "temp-dv-index", "temp-dv-index"),
1L + ((numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT),
ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
);
DISIAccumulator disiAccumulator = null;
try {
if (numValues > 0) {
assert numDocsWithValue > 0;
// Special case for maxOrd of 1, signal -1 that no blocks will be written
meta.writeInt(maxOrd != 1 ? ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT : -1);
final ByteBuffersDataOutput indexOut = new ByteBuffersDataOutput();
final DirectMonotonicWriter indexWriter = DirectMonotonicWriter.getInstance(
meta,
new ByteBuffersIndexOutput(indexOut, "temp-dv-index", "temp-dv-index"),
1L + ((numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT),
ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
);
final long valuesDataOffset = data.getFilePointer();
// Special case for maxOrd of 1, skip writing the blocks
if (maxOrd != 1) {
final long[] buffer = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
int bufferSize = 0;
final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
values = valuesProducer.getSortedNumeric(field);
final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
final int count = values.docValueCount();
for (int i = 0; i < count; ++i) {
buffer[bufferSize++] = values.nextValue();
if (bufferSize == ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE) {
indexWriter.add(data.getFilePointer() - valuesDataOffset);
if (maxOrd >= 0) {
encoder.encodeOrdinals(buffer, data, bitsPerOrd);
} else {
encoder.encode(buffer, data);
final long valuesDataOffset = data.getFilePointer();
// Special case for maxOrd of 1, skip writing the blocks
if (maxOrd != 1) {
final long[] buffer = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
int bufferSize = 0;
final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
values = valuesProducer.getSortedNumeric(field);
final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
if (enableOptimizedMerge && numDocsWithValue < maxDoc) {
disiAccumulator = new DISIAccumulator(dir, context, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
}
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
if (disiAccumulator != null) {
disiAccumulator.addDocId(doc);
}
final int count = values.docValueCount();
for (int i = 0; i < count; ++i) {
buffer[bufferSize++] = values.nextValue();
if (bufferSize == ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE) {
indexWriter.add(data.getFilePointer() - valuesDataOffset);
if (maxOrd >= 0) {
encoder.encodeOrdinals(buffer, data, bitsPerOrd);
} else {
encoder.encode(buffer, data);
}
bufferSize = 0;
}
bufferSize = 0;
}
}
if (bufferSize > 0) {
indexWriter.add(data.getFilePointer() - valuesDataOffset);
// Fill unused slots in the block with zeroes rather than junk
Arrays.fill(buffer, bufferSize, ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE, 0L);
if (maxOrd >= 0) {
encoder.encodeOrdinals(buffer, data, bitsPerOrd);
} else {
encoder.encode(buffer, data);
}
}
}
if (bufferSize > 0) {
indexWriter.add(data.getFilePointer() - valuesDataOffset);
// Fill unused slots in the block with zeroes rather than junk
Arrays.fill(buffer, bufferSize, ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE, 0L);
if (maxOrd >= 0) {
encoder.encodeOrdinals(buffer, data, bitsPerOrd);
} else {
encoder.encode(buffer, data);
}
final long valuesDataLength = data.getFilePointer() - valuesDataOffset;
if (maxOrd != 1) {
// Special case for maxOrd of 1, indexWriter isn't really used, so no need to invoke finish() method.
indexWriter.finish();
}
final long indexDataOffset = data.getFilePointer();
data.copyBytes(indexOut.toDataInput(), indexOut.size());
meta.writeLong(indexDataOffset);
meta.writeLong(data.getFilePointer() - indexDataOffset);
meta.writeLong(valuesDataOffset);
meta.writeLong(valuesDataLength);
}
final long valuesDataLength = data.getFilePointer() - valuesDataOffset;
if (maxOrd != 1) {
// Special case for maxOrd of 1, indexWriter isn't really used, so no need to invoke finish() method.
indexWriter.finish();
if (numDocsWithValue == 0) { // meta[-2, 0]: No documents with values
meta.writeLong(-2); // docsWithFieldOffset
meta.writeLong(0L); // docsWithFieldLength
meta.writeShort((short) -1); // jumpTableEntryCount
meta.writeByte((byte) -1); // denseRankPower
} else if (numDocsWithValue == maxDoc) { // meta[-1, 0]: All documents have values
meta.writeLong(-1); // docsWithFieldOffset
meta.writeLong(0L); // docsWithFieldLength
meta.writeShort((short) -1); // jumpTableEntryCount
meta.writeByte((byte) -1); // denseRankPower
} else { // meta[data.offset, data.length]: IndexedDISI structure for documents with values
long offset = data.getFilePointer();
meta.writeLong(offset); // docsWithFieldOffset
final short jumpTableEntryCount;
if (maxOrd != 1 && disiAccumulator != null) {
jumpTableEntryCount = disiAccumulator.build(data);
} else {
values = valuesProducer.getSortedNumeric(field);
jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
}
meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
meta.writeShort(jumpTableEntryCount);
meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
}
final long indexDataOffset = data.getFilePointer();
data.copyBytes(indexOut.toDataInput(), indexOut.size());
meta.writeLong(indexDataOffset);
meta.writeLong(data.getFilePointer() - indexDataOffset);
meta.writeLong(valuesDataOffset);
meta.writeLong(valuesDataLength);
}
if (numDocsWithValue == 0) { // meta[-2, 0]: No documents with values
meta.writeLong(-2); // docsWithFieldOffset
meta.writeLong(0L); // docsWithFieldLength
meta.writeShort((short) -1); // jumpTableEntryCount
meta.writeByte((byte) -1); // denseRankPower
} else if (numDocsWithValue == maxDoc) { // meta[-1, 0]: All documents have values
meta.writeLong(-1); // docsWithFieldOffset
meta.writeLong(0L); // docsWithFieldLength
meta.writeShort((short) -1); // jumpTableEntryCount
meta.writeByte((byte) -1); // denseRankPower
} else { // meta[data.offset, data.length]: IndexedDISI structure for documents with values
long offset = data.getFilePointer();
meta.writeLong(offset); // docsWithFieldOffset
values = valuesProducer.getSortedNumeric(field);
final short jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
meta.writeShort(jumpTableEntryCount);
meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
} finally {
IOUtils.close(disiAccumulator);
}
return new long[] { numDocsWithValue, numValues };

View file

@ -0,0 +1,518 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.index.codec.tsdb.es819;
import org.apache.lucene.codecs.lucene90.IndexedDISI;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.SparseFixedBitSet;
import org.elasticsearch.core.SuppressForbidden;
import java.io.IOException;
import java.util.Random;
// Copied from org.apache.lucene.codecs.lucene90.TestIndexedDISI and kept tests that we can run.
// The test suite has been modified to write jump table using writeJumpTable(...) in this class.
// (some original tests require access to package protected constructor of IndexedDISI and was removed)
public class DISIAccumulatorTests extends LuceneTestCase {
public void testEmpty() throws IOException {
int maxDoc = TestUtil.nextInt(random(), 1, 100000);
BitSet set = new SparseFixedBitSet(maxDoc);
try (Directory dir = newDirectory()) {
doTest(set, dir);
}
}
// EMPTY blocks are special with regard to jumps as they have size 0
public void testEmptyBlocks() throws IOException {
final int B = 65536;
int maxDoc = B * 11;
BitSet set = new SparseFixedBitSet(maxDoc);
// block 0: EMPTY
set.set(B + 5); // block 1: SPARSE
// block 2: EMPTY
// block 3: EMPTY
set.set(B * 4 + 5); // block 4: SPARSE
for (int i = 0; i < B; i++) {
set.set(B * 6 + i); // block 6: ALL
}
for (int i = 0; i < B; i += 3) {
set.set(B * 7 + i); // block 7: DENSE
}
for (int i = 0; i < B; i++) {
if (i != 32768) {
set.set(B * 8 + i); // block 8: DENSE (all-1)
}
}
// block 9-11: EMPTY
try (Directory dir = newDirectory()) {
doTestAllSingleJump(set, dir);
}
// Change the first block to DENSE to see if jump-tables sets to position 0
set.set(0);
try (Directory dir = newDirectory()) {
doTestAllSingleJump(set, dir);
}
}
// EMPTY blocks are special with regard to jumps as they have size 0
public void testLastEmptyBlocks() throws IOException {
final int B = 65536;
int maxDoc = B * 3;
BitSet set = new SparseFixedBitSet(maxDoc);
for (int docID = 0; docID < B * 2; docID++) { // first 2 blocks are ALL
set.set(docID);
}
// Last block is EMPTY
try (Directory dir = newDirectory()) {
doTestAllSingleJump(set, dir);
assertAdvanceBeyondEnd(set, dir);
}
}
// Checks that advance after the end of the blocks has been reached has the correct behaviour
private void assertAdvanceBeyondEnd(BitSet set, Directory dir) throws IOException {
final int cardinality = set.cardinality();
final byte denseRankPower = 9; // Not tested here so fixed to isolate factors
int jumpTableentryCount;
try (IndexOutput out = dir.createOutput("bar", IOContext.DEFAULT)) {
jumpTableentryCount = writeJumpTable(set, dir, out, denseRankPower);
}
try (IndexInput in = dir.openInput("bar", IOContext.DEFAULT)) {
BitSetIterator disi2 = new BitSetIterator(set, cardinality);
int doc = disi2.docID();
int index = 0;
while (doc < cardinality) {
doc = disi2.nextDoc();
index++;
}
IndexedDISI disi = new IndexedDISI(in, 0L, in.length(), jumpTableentryCount, denseRankPower, cardinality);
// Advance 1 docID beyond end
assertFalse("There should be no set bit beyond the valid docID range", disi.advanceExact(set.length()));
disi.advance(doc); // Should be the special docID signifyin NO_MORE_DOCS from the BitSetIterator
// disi.index()+1 as the while-loop also counts the NO_MORE_DOCS
assertEquals("The index when advancing beyond the last defined docID should be correct", index, disi.index() + 1);
}
}
// TODO: can this be toned down?
public void testRandomBlocks() throws IOException {
final int BLOCKS = 5;
BitSet set = createSetWithRandomBlocks(BLOCKS);
try (Directory dir = newDirectory()) {
doTestAllSingleJump(set, dir);
}
}
private BitSet createSetWithRandomBlocks(int blockCount) {
final int B = 65536;
BitSet set = new SparseFixedBitSet(blockCount * B);
for (int block = 0; block < blockCount; block++) {
switch (random().nextInt(4)) {
case 0: { // EMPTY
break;
}
case 1: { // ALL
for (int docID = block * B; docID < (block + 1) * B; docID++) {
set.set(docID);
}
break;
}
case 2: { // SPARSE ( < 4096 )
for (int docID = block * B; docID < (block + 1) * B; docID += 101) {
set.set(docID);
}
break;
}
case 3: { // DENSE ( >= 4096 )
for (int docID = block * B; docID < (block + 1) * B; docID += 3) {
set.set(docID);
}
break;
}
default:
throw new IllegalStateException("Modulo logic error: there should only be 4 possibilities");
}
}
return set;
}
private void doTestAllSingleJump(BitSet set, Directory dir) throws IOException {
final int cardinality = set.cardinality();
final byte denseRankPower = rarely() ? -1 : (byte) (random().nextInt(7) + 7); // sane + chance of disable
long length;
int jumpTableentryCount;
try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
jumpTableentryCount = writeJumpTable(set, dir, out, denseRankPower);
length = out.getFilePointer();
}
try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
for (int i = 0; i < set.length(); i++) {
IndexedDISI disi = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality);
assertEquals("The bit at " + i + " should be correct with advanceExact", set.get(i), disi.advanceExact(i));
IndexedDISI disi2 = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality);
disi2.advance(i);
// Proper sanity check with jump tables as an error could make them seek backwards
assertTrue("The docID should at least be " + i + " after advance(" + i + ") but was " + disi2.docID(), i <= disi2.docID());
if (set.get(i)) {
assertEquals("The docID should be present with advance", i, disi2.docID());
} else {
assertNotSame("The docID should not be present with advance", i, disi2.docID());
}
}
}
}
public void testOneDoc() throws IOException {
int maxDoc = TestUtil.nextInt(random(), 1, 100000);
BitSet set = new SparseFixedBitSet(maxDoc);
set.set(random().nextInt(maxDoc));
try (Directory dir = newDirectory()) {
doTest(set, dir);
}
}
public void testTwoDocs() throws IOException {
int maxDoc = TestUtil.nextInt(random(), 1, 100000);
BitSet set = new SparseFixedBitSet(maxDoc);
set.set(random().nextInt(maxDoc));
set.set(random().nextInt(maxDoc));
try (Directory dir = newDirectory()) {
doTest(set, dir);
}
}
public void testAllDocs() throws IOException {
int maxDoc = TestUtil.nextInt(random(), 1, 100000);
FixedBitSet set = new FixedBitSet(maxDoc);
set.set(1, maxDoc);
try (Directory dir = newDirectory()) {
doTest(set, dir);
}
}
public void testHalfFull() throws IOException {
int maxDoc = TestUtil.nextInt(random(), 1, 100000);
BitSet set = new SparseFixedBitSet(maxDoc);
for (int i = random().nextInt(2); i < maxDoc; i += TestUtil.nextInt(random(), 1, 3)) {
set.set(i);
}
try (Directory dir = newDirectory()) {
doTest(set, dir);
}
}
public void testDocRange() throws IOException {
try (Directory dir = newDirectory()) {
for (int iter = 0; iter < 10; ++iter) {
int maxDoc = TestUtil.nextInt(random(), 1, 1000000);
FixedBitSet set = new FixedBitSet(maxDoc);
final int start = random().nextInt(maxDoc);
final int end = TestUtil.nextInt(random(), start + 1, maxDoc);
set.set(start, end);
doTest(set, dir);
}
}
}
public void testSparseDenseBoundary() throws IOException, NoSuchFieldException, IllegalAccessException {
try (Directory dir = newDirectory()) {
FixedBitSet set = new FixedBitSet(200000);
int start = 65536 + random().nextInt(100);
final byte denseRankPower = rarely() ? -1 : (byte) (random().nextInt(7) + 7); // sane + chance of disable
// we set MAX_ARRAY_LENGTH bits so the encoding will be sparse
set.set(start, start + DISIAccumulator.MAX_ARRAY_LENGTH);
long length;
int jumpTableEntryCount;
try (IndexOutput out = dir.createOutput("sparse", IOContext.DEFAULT)) {
jumpTableEntryCount = writeJumpTable(set, DISIAccumulator.MAX_ARRAY_LENGTH, dir, out, denseRankPower);
length = out.getFilePointer();
}
try (IndexInput in = dir.openInput("sparse", IOContext.DEFAULT)) {
IndexedDISI disi = new IndexedDISI(in, 0L, length, jumpTableEntryCount, denseRankPower, DISIAccumulator.MAX_ARRAY_LENGTH);
assertEquals(start, disi.nextDoc());
if (System.getSecurityManager() == null) {
assertEquals("SPARSE", getMethodFromDISI(disi));
}
}
doTest(set, dir);
// now we set one more bit so the encoding will be dense
set.set(start + DISIAccumulator.MAX_ARRAY_LENGTH + random().nextInt(100));
try (IndexOutput out = dir.createOutput("bar", IOContext.DEFAULT)) {
writeJumpTable(set, DISIAccumulator.MAX_ARRAY_LENGTH, dir, out, denseRankPower);
length = out.getFilePointer();
}
try (IndexInput in = dir.openInput("bar", IOContext.DEFAULT)) {
IndexedDISI disi = new IndexedDISI(
in,
0L,
length,
jumpTableEntryCount,
denseRankPower,
DISIAccumulator.MAX_ARRAY_LENGTH + 1
);
assertEquals(start, disi.nextDoc());
if (System.getSecurityManager() == null) {
assertEquals("DENSE", getMethodFromDISI(disi));
}
}
doTest(set, dir);
}
}
@SuppressForbidden(reason = "access violation required in order to read private field for this test")
private static String getMethodFromDISI(Object o) throws NoSuchFieldException, IllegalAccessException {
var field = IndexedDISI.class.getDeclaredField("method");
field.setAccessible(true);
return field.get(o).toString();
}
public void testOneDocMissing() throws IOException {
int maxDoc = TestUtil.nextInt(random(), 1, 1000000);
FixedBitSet set = new FixedBitSet(maxDoc);
set.set(0, maxDoc);
set.clear(random().nextInt(maxDoc));
try (Directory dir = newDirectory()) {
doTest(set, dir);
}
}
public void testFewMissingDocs() throws IOException {
try (Directory dir = newDirectory()) {
int numIters = atLeast(10);
for (int iter = 0; iter < numIters; ++iter) {
int maxDoc = TestUtil.nextInt(random(), 1, 100000);
FixedBitSet set = new FixedBitSet(maxDoc);
set.set(0, maxDoc);
final int numMissingDocs = TestUtil.nextInt(random(), 2, 1000);
for (int i = 0; i < numMissingDocs; ++i) {
set.clear(random().nextInt(maxDoc));
}
doTest(set, dir);
}
}
}
public void testDenseMultiBlock() throws IOException {
try (Directory dir = newDirectory()) {
int maxDoc = 10 * 65536; // 10 blocks
FixedBitSet set = new FixedBitSet(maxDoc);
for (int i = 0; i < maxDoc; i += 2) { // Set every other to ensure dense
set.set(i);
}
doTest(set, dir);
}
}
public void testIllegalDenseRankPower() throws IOException {
// Legal values
for (byte denseRankPower : new byte[] { -1, 7, 8, 9, 10, 11, 12, 13, 14, 15 }) {
createAndOpenDISI(denseRankPower, denseRankPower);
}
// Illegal values
for (byte denseRankPower : new byte[] { -2, 0, 1, 6, 16 }) {
expectThrows(IllegalArgumentException.class, () -> {
createAndOpenDISI(denseRankPower, (byte) 8); // Illegal write, legal read (should not reach read)
});
expectThrows(IllegalArgumentException.class, () -> {
createAndOpenDISI((byte) 8, denseRankPower); // Legal write, illegal read (should reach read)
});
}
}
private void createAndOpenDISI(byte denseRankPowerWrite, byte denseRankPowerRead) throws IOException {
BitSet set = new FixedBitSet(10);
set.set(set.length() - 1);
try (Directory dir = newDirectory()) {
long length;
int jumpTableEntryCount = -1;
try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
jumpTableEntryCount = writeJumpTable(set, dir, out, denseRankPowerWrite);
length = out.getFilePointer();
}
try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
new IndexedDISI(in, 0L, length, jumpTableEntryCount, denseRankPowerRead, set.cardinality());
}
// This tests the legality of the denseRankPower only, so we don't do anything with the disi
}
}
public void testOneDocMissingFixed() throws IOException {
int maxDoc = 9699;
final byte denseRankPower = rarely() ? -1 : (byte) (random().nextInt(7) + 7); // sane + chance of disable
FixedBitSet set = new FixedBitSet(maxDoc);
set.set(0, maxDoc);
set.clear(1345);
try (Directory dir = newDirectory()) {
final int cardinality = set.cardinality();
long length;
int jumpTableentryCount;
try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
jumpTableentryCount = writeJumpTable(set, dir, out, denseRankPower);
length = out.getFilePointer();
}
int step = 16000;
try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
IndexedDISI disi = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality);
BitSetIterator disi2 = new BitSetIterator(set, cardinality);
assertAdvanceEquality(disi, disi2, step);
}
}
}
public void testRandom() throws IOException {
try (Directory dir = newDirectory()) {
int numIters = atLeast(3);
for (int i = 0; i < numIters; ++i) {
doTestRandom(dir);
}
}
}
private void doTestRandom(Directory dir) throws IOException {
Random random = random();
final int maxStep = TestUtil.nextInt(random, 1, 1 << TestUtil.nextInt(random, 2, 20));
final int numDocs = TestUtil.nextInt(random, 1, Math.min(100000, (Integer.MAX_VALUE - 1) / maxStep));
BitSet docs = new SparseFixedBitSet(numDocs * maxStep + 1);
int lastDoc = -1;
for (int doc = -1, i = 0; i < numDocs; ++i) {
doc += TestUtil.nextInt(random, 1, maxStep);
docs.set(doc);
lastDoc = doc;
}
final int maxDoc = lastDoc + TestUtil.nextInt(random, 1, 100);
BitSet set = BitSet.of(new BitSetIterator(docs, docs.approximateCardinality()), maxDoc);
doTest(set, dir);
}
private void doTest(BitSet set, Directory dir) throws IOException {
final int cardinality = set.cardinality();
final byte denseRankPower = rarely() ? -1 : (byte) (random().nextInt(7) + 7); // sane + chance of disable
long length;
int jumpTableentryCount;
try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
jumpTableentryCount = writeJumpTable(set, dir, out, denseRankPower);
length = out.getFilePointer();
}
try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
IndexedDISI disi = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality);
BitSetIterator disi2 = new BitSetIterator(set, cardinality);
assertSingleStepEquality(disi, disi2);
}
for (int step : new int[] { 1, 10, 100, 1000, 10000, 100000 }) {
try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
IndexedDISI disi = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality);
BitSetIterator disi2 = new BitSetIterator(set, cardinality);
assertAdvanceEquality(disi, disi2, step);
}
}
for (int step : new int[] { 10, 100, 1000, 10000, 100000 }) {
try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
IndexedDISI disi = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality);
BitSetIterator disi2 = new BitSetIterator(set, cardinality);
int disi2length = set.length();
assertAdvanceExactRandomized(disi, disi2, disi2length, step);
}
}
dir.deleteFile("foo");
}
private void assertAdvanceExactRandomized(IndexedDISI disi, BitSetIterator disi2, int disi2length, int step) throws IOException {
int index = -1;
Random random = random();
for (int target = 0; target < disi2length;) {
target += TestUtil.nextInt(random, 0, step);
int doc = disi2.docID();
while (doc < target) {
doc = disi2.nextDoc();
index++;
}
boolean exists = disi.advanceExact(target);
assertEquals(doc == target, exists);
if (exists) {
assertEquals(index, disi.index());
} else if (random.nextBoolean()) {
assertEquals(doc, disi.nextDoc());
// This is a bit strange when doc == NO_MORE_DOCS as the index overcounts in the disi2
// while-loop
assertEquals(index, disi.index());
target = doc;
}
}
}
private void assertSingleStepEquality(IndexedDISI disi, BitSetIterator disi2) throws IOException {
int i = 0;
for (int doc = disi2.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = disi2.nextDoc()) {
assertEquals(doc, disi.nextDoc());
assertEquals(i++, disi.index());
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, disi.nextDoc());
}
private void assertAdvanceEquality(IndexedDISI disi, BitSetIterator disi2, int step) throws IOException {
int index = -1;
while (true) {
int target = disi2.docID() + step;
int doc;
do {
doc = disi2.nextDoc();
index++;
} while (doc < target);
assertEquals(doc, disi.advance(target));
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
assertEquals("Expected equality using step " + step + " at docID " + doc, index, disi.index());
}
}
private static short writeJumpTable(BitSet set, Directory dir, IndexOutput out, byte denseRankPower) throws IOException {
return writeJumpTable(set, set.cardinality(), dir, out, denseRankPower);
}
private static short writeJumpTable(BitSet set, long cost, Directory dir, IndexOutput out, byte denseRankPower) throws IOException {
var disiAccumulator = new DISIAccumulator(dir, IOContext.DEFAULT, out, denseRankPower);
var iterator = new BitSetIterator(set, cost);
for (int docId = iterator.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = iterator.nextDoc()) {
disiAccumulator.addDocId(docId);
}
return disiAccumulator.build(out);
}
}