mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-28 09:28:55 -04:00
Fix ES818BinaryQuantizedVectorsReader to not use directIO during merge (#130114)
This commit fixes the BBQ reader to **not** use directIO when merging the original float vectors.
This commit is contained in:
parent
ff65fd1133
commit
93e4e01277
3 changed files with 65 additions and 1 deletions
|
@ -65,7 +65,7 @@ public class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader impleme
|
||||||
|
|
||||||
private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(ES818BinaryQuantizedVectorsReader.class);
|
private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(ES818BinaryQuantizedVectorsReader.class);
|
||||||
|
|
||||||
private final Map<String, FieldEntry> fields = new HashMap<>();
|
private final Map<String, FieldEntry> fields;
|
||||||
private final IndexInput quantizedVectorData;
|
private final IndexInput quantizedVectorData;
|
||||||
private final FlatVectorsReader rawVectorsReader;
|
private final FlatVectorsReader rawVectorsReader;
|
||||||
private final ES818BinaryFlatVectorsScorer vectorScorer;
|
private final ES818BinaryFlatVectorsScorer vectorScorer;
|
||||||
|
@ -77,6 +77,7 @@ public class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader impleme
|
||||||
ES818BinaryFlatVectorsScorer vectorsScorer
|
ES818BinaryFlatVectorsScorer vectorsScorer
|
||||||
) throws IOException {
|
) throws IOException {
|
||||||
super(vectorsScorer);
|
super(vectorsScorer);
|
||||||
|
this.fields = new HashMap<>();
|
||||||
this.vectorScorer = vectorsScorer;
|
this.vectorScorer = vectorsScorer;
|
||||||
this.rawVectorsReader = rawVectorsReader;
|
this.rawVectorsReader = rawVectorsReader;
|
||||||
int versionMeta = -1;
|
int versionMeta = -1;
|
||||||
|
@ -120,6 +121,24 @@ public class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader impleme
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private ES818BinaryQuantizedVectorsReader(ES818BinaryQuantizedVectorsReader clone, FlatVectorsReader rawVectorsReader) {
|
||||||
|
super(clone.vectorScorer);
|
||||||
|
this.rawVectorsReader = rawVectorsReader;
|
||||||
|
this.vectorScorer = clone.vectorScorer;
|
||||||
|
this.quantizedVectorData = clone.quantizedVectorData;
|
||||||
|
this.fields = clone.fields;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For testing
|
||||||
|
FlatVectorsReader getRawVectorsReader() {
|
||||||
|
return rawVectorsReader;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FlatVectorsReader getMergeInstance() {
|
||||||
|
return new ES818BinaryQuantizedVectorsReader(this, rawVectorsReader.getMergeInstance());
|
||||||
|
}
|
||||||
|
|
||||||
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
|
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
|
||||||
for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
|
for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
|
||||||
FieldInfo info = infos.fieldInfo(fieldNumber);
|
FieldInfo info = infos.fieldInfo(fieldNumber);
|
||||||
|
|
|
@ -36,6 +36,11 @@ class MergeReaderWrapper extends FlatVectorsReader implements OffHeapStats {
|
||||||
this.mergeReader = mergeReader;
|
this.mergeReader = mergeReader;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For testing
|
||||||
|
FlatVectorsReader getMainReader() {
|
||||||
|
return mainReader;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public RandomVectorScorer getRandomVectorScorer(String field, float[] target) throws IOException {
|
public RandomVectorScorer getRandomVectorScorer(String field, float[] target) throws IOException {
|
||||||
return mainReader.getRandomVectorScorer(field, target);
|
return mainReader.getRandomVectorScorer(field, target);
|
||||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.FilterCodec;
|
import org.apache.lucene.codecs.FilterCodec;
|
||||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.KnnVectorsReader;
|
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||||
|
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsReader;
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
|
@ -35,6 +36,7 @@ import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.KnnVectorValues;
|
import org.apache.lucene.index.KnnVectorValues;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.SegmentReader;
|
||||||
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
|
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||||
|
@ -83,6 +85,7 @@ import java.util.OptionalLong;
|
||||||
import static java.lang.String.format;
|
import static java.lang.String.format;
|
||||||
import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT;
|
import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT;
|
||||||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
import static org.hamcrest.Matchers.instanceOf;
|
||||||
import static org.hamcrest.Matchers.is;
|
import static org.hamcrest.Matchers.is;
|
||||||
import static org.hamcrest.Matchers.oneOf;
|
import static org.hamcrest.Matchers.oneOf;
|
||||||
|
|
||||||
|
@ -309,6 +312,43 @@ public class ES818BinaryQuantizedVectorsFormatTests extends BaseKnnVectorsFormat
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testMergeInstance() throws IOException {
|
||||||
|
checkDirectIOSupported();
|
||||||
|
float[] vector = randomVector(10);
|
||||||
|
VectorSimilarityFunction similarityFunction = randomSimilarity();
|
||||||
|
KnnFloatVectorField knnField = new KnnFloatVectorField("field", vector, similarityFunction);
|
||||||
|
try (Directory dir = newFSDirectory()) {
|
||||||
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setUseCompoundFile(false))) {
|
||||||
|
Document doc = new Document();
|
||||||
|
knnField.setVectorValue(randomVector(10));
|
||||||
|
doc.add(knnField);
|
||||||
|
w.addDocument(doc);
|
||||||
|
w.commit();
|
||||||
|
|
||||||
|
try (IndexReader reader = DirectoryReader.open(w)) {
|
||||||
|
SegmentReader r = (SegmentReader) getOnlyLeafReader(reader);
|
||||||
|
assertThat(unwrapRawVectorReader("field", r.getVectorReader()), instanceOf(DirectIOLucene99FlatVectorsReader.class));
|
||||||
|
assertThat(
|
||||||
|
unwrapRawVectorReader("field", r.getVectorReader().getMergeInstance()),
|
||||||
|
instanceOf(Lucene99FlatVectorsReader.class)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static KnnVectorsReader unwrapRawVectorReader(String fieldName, KnnVectorsReader knnReader) {
|
||||||
|
if (knnReader instanceof PerFieldKnnVectorsFormat.FieldsReader perField) {
|
||||||
|
return unwrapRawVectorReader(fieldName, perField.getFieldReader(fieldName));
|
||||||
|
} else if (knnReader instanceof ES818BinaryQuantizedVectorsReader bbqReader) {
|
||||||
|
return unwrapRawVectorReader(fieldName, bbqReader.getRawVectorsReader());
|
||||||
|
} else if (knnReader instanceof MergeReaderWrapper mergeReaderWrapper) {
|
||||||
|
return unwrapRawVectorReader(fieldName, mergeReaderWrapper.getMainReader());
|
||||||
|
} else {
|
||||||
|
return knnReader;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static Directory newMMapDirectory() throws IOException {
|
static Directory newMMapDirectory() throws IOException {
|
||||||
Directory dir = new MMapDirectory(createTempDir("ES818BinaryQuantizedVectorsFormatTests"));
|
Directory dir = new MMapDirectory(createTempDir("ES818BinaryQuantizedVectorsFormatTests"));
|
||||||
if (random().nextBoolean()) {
|
if (random().nextBoolean()) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue