mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-28 17:34:17 -04:00
Switch IVF Writer to ES Logger (#129224)
update to use ES logger instead of infostream and fixing native access warnings
This commit is contained in:
parent
916cd052bd
commit
be703a034f
9 changed files with 51 additions and 105 deletions
|
@ -7,6 +7,8 @@
|
||||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.elasticsearch.gradle.internal.test.TestUtil
|
||||||
|
|
||||||
apply plugin: 'elasticsearch.java'
|
apply plugin: 'elasticsearch.java'
|
||||||
apply plugin: 'elasticsearch.build'
|
apply plugin: 'elasticsearch.build'
|
||||||
|
|
||||||
|
@ -23,6 +25,8 @@ dependencies {
|
||||||
api "org.apache.lucene:lucene-core:${versions.lucene}"
|
api "org.apache.lucene:lucene-core:${versions.lucene}"
|
||||||
api "org.apache.lucene:lucene-queries:${versions.lucene}"
|
api "org.apache.lucene:lucene-queries:${versions.lucene}"
|
||||||
api "org.apache.lucene:lucene-codecs:${versions.lucene}"
|
api "org.apache.lucene:lucene-codecs:${versions.lucene}"
|
||||||
|
implementation project(':libs:simdvec')
|
||||||
|
implementation project(':libs:native')
|
||||||
implementation project(':libs:logging')
|
implementation project(':libs:logging')
|
||||||
implementation project(':server')
|
implementation project(':server')
|
||||||
}
|
}
|
||||||
|
@ -37,6 +41,7 @@ tasks.register("checkVec", JavaExec) {
|
||||||
// Configure logging to console
|
// Configure logging to console
|
||||||
systemProperty "es.logger.out", "console"
|
systemProperty "es.logger.out", "console"
|
||||||
systemProperty "es.logger.level", "INFO" // Change to DEBUG if needed
|
systemProperty "es.logger.level", "INFO" // Change to DEBUG if needed
|
||||||
|
systemProperty 'es.nativelibs.path', TestUtil.getTestLibraryPath(file("../../libs/native/libraries/build/platform/").toString())
|
||||||
|
|
||||||
if (buildParams.getRuntimeJavaVersion().map { it.majorVersion.toInteger() }.get() >= 21) {
|
if (buildParams.getRuntimeJavaVersion().map { it.majorVersion.toInteger() }.get() >= 21) {
|
||||||
jvmArgs '-Xms4g', '-Xmx4g', '--add-modules=jdk.incubator.vector', '--enable-native-access=ALL-UNNAMED', '-Djava.util.concurrent.ForkJoinPool.common.parallelism=8', '-XX:+UnlockDiagnosticVMOptions', '-XX:+DebugNonSafepoints', '-XX:+HeapDumpOnOutOfMemoryError'
|
jvmArgs '-Xms4g', '-Xmx4g', '--add-modules=jdk.incubator.vector', '--enable-native-access=ALL-UNNAMED', '-Djava.util.concurrent.ForkJoinPool.common.parallelism=8', '-XX:+UnlockDiagnosticVMOptions', '-XX:+DebugNonSafepoints', '-XX:+HeapDumpOnOutOfMemoryError'
|
||||||
|
|
|
@ -11,6 +11,7 @@ module org.elasticsearch.test.knn {
|
||||||
requires org.elasticsearch.base;
|
requires org.elasticsearch.base;
|
||||||
requires org.elasticsearch.server;
|
requires org.elasticsearch.server;
|
||||||
requires org.elasticsearch.xcontent;
|
requires org.elasticsearch.xcontent;
|
||||||
|
requires org.elasticsearch.cli;
|
||||||
requires org.apache.lucene.core;
|
requires org.apache.lucene.core;
|
||||||
requires org.apache.lucene.codecs;
|
requires org.apache.lucene.codecs;
|
||||||
requires org.apache.lucene.queries;
|
requires org.apache.lucene.queries;
|
||||||
|
|
|
@ -15,8 +15,10 @@ import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
|
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
|
||||||
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
|
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
|
||||||
|
import org.elasticsearch.cli.ProcessInfo;
|
||||||
import org.elasticsearch.common.Strings;
|
import org.elasticsearch.common.Strings;
|
||||||
import org.elasticsearch.common.logging.LogConfigurator;
|
import org.elasticsearch.common.logging.LogConfigurator;
|
||||||
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.core.PathUtils;
|
import org.elasticsearch.core.PathUtils;
|
||||||
import org.elasticsearch.index.codec.vectors.ES813Int8FlatVectorFormat;
|
import org.elasticsearch.index.codec.vectors.ES813Int8FlatVectorFormat;
|
||||||
import org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat;
|
import org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat;
|
||||||
|
@ -24,6 +26,8 @@ import org.elasticsearch.index.codec.vectors.IVFVectorsFormat;
|
||||||
import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat;
|
import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat;
|
||||||
import org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat;
|
import org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat;
|
||||||
import org.elasticsearch.logging.Level;
|
import org.elasticsearch.logging.Level;
|
||||||
|
import org.elasticsearch.logging.LogManager;
|
||||||
|
import org.elasticsearch.logging.Logger;
|
||||||
import org.elasticsearch.xcontent.XContentParser;
|
import org.elasticsearch.xcontent.XContentParser;
|
||||||
import org.elasticsearch.xcontent.XContentParserConfiguration;
|
import org.elasticsearch.xcontent.XContentParserConfiguration;
|
||||||
import org.elasticsearch.xcontent.XContentType;
|
import org.elasticsearch.xcontent.XContentType;
|
||||||
|
@ -35,19 +39,26 @@ import java.nio.file.Path;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A utility class to create and test KNN indices using Lucene.
|
* A utility class to create and test KNN indices using Lucene.
|
||||||
* It supports various index types (HNSW, FLAT, IVF) and configurations.
|
* It supports various index types (HNSW, FLAT, IVF) and configurations.
|
||||||
*/
|
*/
|
||||||
public class KnnIndexTester {
|
public class KnnIndexTester {
|
||||||
static final Level LOG_LEVEL = Level.DEBUG;
|
static final Logger logger;
|
||||||
|
|
||||||
static final SysOutLogger logger = new SysOutLogger();
|
|
||||||
|
|
||||||
static {
|
static {
|
||||||
LogConfigurator.loadLog4jPlugins();
|
LogConfigurator.loadLog4jPlugins();
|
||||||
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
|
|
||||||
|
// necessary otherwise the es.logger.level system configuration in build.gradle is ignored
|
||||||
|
ProcessInfo pinfo = ProcessInfo.fromSystem();
|
||||||
|
Map<String, String> sysprops = pinfo.sysprops();
|
||||||
|
String loggerLevel = sysprops.getOrDefault("es.logger.level", Level.INFO.name());
|
||||||
|
Settings settings = Settings.builder().put("logger.level", loggerLevel).build();
|
||||||
|
LogConfigurator.configureWithoutConfig(settings);
|
||||||
|
|
||||||
|
logger = LogManager.getLogger(KnnIndexTester.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
static final String INDEX_DIR = "target/knn_index";
|
static final String INDEX_DIR = "target/knn_index";
|
||||||
|
@ -163,7 +174,7 @@ public class KnnIndexTester {
|
||||||
FormattedResults formattedResults = new FormattedResults();
|
FormattedResults formattedResults = new FormattedResults();
|
||||||
for (CmdLineArgs cmdLineArgs : cmdLineArgsList) {
|
for (CmdLineArgs cmdLineArgs : cmdLineArgsList) {
|
||||||
Results result = new Results(cmdLineArgs.indexType().name().toLowerCase(Locale.ROOT), cmdLineArgs.numDocs());
|
Results result = new Results(cmdLineArgs.indexType().name().toLowerCase(Locale.ROOT), cmdLineArgs.numDocs());
|
||||||
System.out.println("Running KNN index tester with arguments: " + cmdLineArgs);
|
logger.info("Running KNN index tester with arguments: " + cmdLineArgs);
|
||||||
Codec codec = createCodec(cmdLineArgs);
|
Codec codec = createCodec(cmdLineArgs);
|
||||||
Path indexPath = PathUtils.get(formatIndexPath(cmdLineArgs));
|
Path indexPath = PathUtils.get(formatIndexPath(cmdLineArgs));
|
||||||
if (cmdLineArgs.reindex() || cmdLineArgs.forceMerge()) {
|
if (cmdLineArgs.reindex() || cmdLineArgs.forceMerge()) {
|
||||||
|
@ -195,8 +206,7 @@ public class KnnIndexTester {
|
||||||
}
|
}
|
||||||
formattedResults.results.add(result);
|
formattedResults.results.add(result);
|
||||||
}
|
}
|
||||||
System.out.println("Results:");
|
logger.info("Results: \n" + formattedResults);
|
||||||
System.out.println(formattedResults);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static class FormattedResults {
|
static class FormattedResults {
|
||||||
|
@ -326,57 +336,6 @@ public class KnnIndexTester {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static final class SysOutLogger {
|
|
||||||
|
|
||||||
void warn(String message) {
|
|
||||||
if (LOG_LEVEL.ordinal() >= Level.WARN.ordinal()) {
|
|
||||||
System.out.println(message);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void warn(String message, Object... params) {
|
|
||||||
if (LOG_LEVEL.ordinal() >= Level.WARN.ordinal()) {
|
|
||||||
System.out.println(String.format(Locale.ROOT, message, params));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void info(String message) {
|
|
||||||
if (LOG_LEVEL.ordinal() >= Level.INFO.ordinal()) {
|
|
||||||
System.out.println(message);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void info(String message, Object... params) {
|
|
||||||
if (LOG_LEVEL.ordinal() >= Level.INFO.ordinal()) {
|
|
||||||
System.out.println(String.format(Locale.ROOT, message, params));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void debug(String message) {
|
|
||||||
if (LOG_LEVEL.ordinal() >= Level.DEBUG.ordinal()) {
|
|
||||||
System.out.println(message);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void debug(String message, Object... params) {
|
|
||||||
if (LOG_LEVEL.ordinal() >= Level.DEBUG.ordinal()) {
|
|
||||||
System.out.println(String.format(Locale.ROOT, message, params));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void trace(String message) {
|
|
||||||
if (LOG_LEVEL == Level.TRACE) {
|
|
||||||
System.out.println(message);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void trace(String message, Object... params) {
|
|
||||||
if (LOG_LEVEL == Level.TRACE) {
|
|
||||||
System.out.println(String.format(Locale.ROOT, message, params));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static final class ThreadDetails {
|
static final class ThreadDetails {
|
||||||
private static final ThreadMXBean threadBean = (ThreadMXBean) java.lang.management.ManagementFactory.getThreadMXBean();
|
private static final ThreadMXBean threadBean = (ThreadMXBean) java.lang.management.ManagementFactory.getThreadMXBean();
|
||||||
public final long[] threadIDs;
|
public final long[] threadIDs;
|
||||||
|
|
|
@ -117,7 +117,7 @@ class KnnIndexer {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"KnnIndexer: using codec=%s, vectorEncoding=%s, dim=%d, similarityFunction=%s",
|
"KnnIndexer: using codec={}, vectorEncoding={}, dim={}, similarityFunction={}",
|
||||||
codec.getName(),
|
codec.getName(),
|
||||||
vectorEncoding,
|
vectorEncoding,
|
||||||
dim,
|
dim,
|
||||||
|
@ -125,7 +125,7 @@ class KnnIndexer {
|
||||||
);
|
);
|
||||||
|
|
||||||
if (Files.exists(indexPath)) {
|
if (Files.exists(indexPath)) {
|
||||||
logger.debug("KnnIndexer: existing index at %s", indexPath);
|
logger.debug("KnnIndexer: existing index at {}", indexPath);
|
||||||
} else {
|
} else {
|
||||||
Files.createDirectories(indexPath);
|
Files.createDirectories(indexPath);
|
||||||
}
|
}
|
||||||
|
@ -143,7 +143,7 @@ class KnnIndexer {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
logger.info(
|
logger.info(
|
||||||
"docsPathSizeInBytes=%d, dim=%d, vectorEncoding=%s, byteSize=%d",
|
"docsPathSizeInBytes={}, dim={}, vectorEncoding={}, byteSize={}",
|
||||||
docsPathSizeInBytes,
|
docsPathSizeInBytes,
|
||||||
dim,
|
dim,
|
||||||
vectorEncoding,
|
vectorEncoding,
|
||||||
|
@ -170,7 +170,7 @@ class KnnIndexer {
|
||||||
}
|
}
|
||||||
|
|
||||||
long elapsed = System.nanoTime() - start;
|
long elapsed = System.nanoTime() - start;
|
||||||
logger.debug("Indexing took %d ms for %d docs", TimeUnit.NANOSECONDS.toMillis(elapsed), numDocs);
|
logger.debug("Indexing took {} ms for {} docs", TimeUnit.NANOSECONDS.toMillis(elapsed), numDocs);
|
||||||
result.indexTimeMS = TimeUnit.NANOSECONDS.toMillis(elapsed);
|
result.indexTimeMS = TimeUnit.NANOSECONDS.toMillis(elapsed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -183,14 +183,14 @@ class KnnIndexer {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
iwc.setCodec(codec);
|
iwc.setCodec(codec);
|
||||||
logger.debug("KnnIndexer: forceMerge in %s", indexPath);
|
logger.debug("KnnIndexer: forceMerge in {}", indexPath);
|
||||||
long startNS = System.nanoTime();
|
long startNS = System.nanoTime();
|
||||||
try (IndexWriter iw = new IndexWriter(FSDirectory.open(indexPath), iwc)) {
|
try (IndexWriter iw = new IndexWriter(FSDirectory.open(indexPath), iwc)) {
|
||||||
iw.forceMerge(1);
|
iw.forceMerge(1);
|
||||||
}
|
}
|
||||||
long endNS = System.nanoTime();
|
long endNS = System.nanoTime();
|
||||||
long elapsedNSec = (endNS - startNS);
|
long elapsedNSec = (endNS - startNS);
|
||||||
logger.info("forceMerge took %d ms", TimeUnit.NANOSECONDS.toMillis(elapsedNSec));
|
logger.info("forceMerge took {} ms", TimeUnit.NANOSECONDS.toMillis(elapsedNSec));
|
||||||
results.forceMergeTimeMS = TimeUnit.NANOSECONDS.toMillis(elapsedNSec);
|
results.forceMergeTimeMS = TimeUnit.NANOSECONDS.toMillis(elapsedNSec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -181,7 +181,7 @@ class KnnSearcher {
|
||||||
resultIds[i] = getResultIds(results[i], storedFields);
|
resultIds[i] = getResultIds(results[i], storedFields);
|
||||||
}
|
}
|
||||||
logger.info(
|
logger.info(
|
||||||
"completed %d searches in %d ms: %d QPS CPU time=%dms",
|
"completed {} searches in {} ms: {} QPS CPU time={}ms",
|
||||||
numQueryVectors,
|
numQueryVectors,
|
||||||
elapsed,
|
elapsed,
|
||||||
(1000L * numQueryVectors) / elapsed,
|
(1000L * numQueryVectors) / elapsed,
|
||||||
|
|
|
@ -17,11 +17,12 @@ import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.internal.hppc.IntArrayList;
|
import org.apache.lucene.internal.hppc.IntArrayList;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.InfoStream;
|
|
||||||
import org.apache.lucene.util.VectorUtil;
|
import org.apache.lucene.util.VectorUtil;
|
||||||
import org.apache.lucene.util.quantization.OptimizedScalarQuantizer;
|
import org.apache.lucene.util.quantization.OptimizedScalarQuantizer;
|
||||||
import org.elasticsearch.index.codec.vectors.cluster.HierarchicalKMeans;
|
import org.elasticsearch.index.codec.vectors.cluster.HierarchicalKMeans;
|
||||||
import org.elasticsearch.index.codec.vectors.cluster.KMeansResult;
|
import org.elasticsearch.index.codec.vectors.cluster.KMeansResult;
|
||||||
|
import org.elasticsearch.logging.LogManager;
|
||||||
|
import org.elasticsearch.logging.Logger;
|
||||||
import org.elasticsearch.simdvec.ES91OSQVectorsScorer;
|
import org.elasticsearch.simdvec.ES91OSQVectorsScorer;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -31,7 +32,6 @@ import java.nio.ByteOrder;
|
||||||
import static org.apache.lucene.codecs.lucene102.Lucene102BinaryQuantizedVectorsFormat.INDEX_BITS;
|
import static org.apache.lucene.codecs.lucene102.Lucene102BinaryQuantizedVectorsFormat.INDEX_BITS;
|
||||||
import static org.apache.lucene.util.quantization.OptimizedScalarQuantizer.discretize;
|
import static org.apache.lucene.util.quantization.OptimizedScalarQuantizer.discretize;
|
||||||
import static org.apache.lucene.util.quantization.OptimizedScalarQuantizer.packAsBinary;
|
import static org.apache.lucene.util.quantization.OptimizedScalarQuantizer.packAsBinary;
|
||||||
import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.IVF_VECTOR_COMPONENT;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Default implementation of {@link IVFVectorsWriter}. It uses {@link HierarchicalKMeans} algorithm to
|
* Default implementation of {@link IVFVectorsWriter}. It uses {@link HierarchicalKMeans} algorithm to
|
||||||
|
@ -39,6 +39,7 @@ import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.IVF_VECTOR_
|
||||||
* fashion.
|
* fashion.
|
||||||
*/
|
*/
|
||||||
public class DefaultIVFVectorsWriter extends IVFVectorsWriter {
|
public class DefaultIVFVectorsWriter extends IVFVectorsWriter {
|
||||||
|
private static final Logger logger = LogManager.getLogger(DefaultIVFVectorsWriter.class);
|
||||||
|
|
||||||
private final int vectorPerCluster;
|
private final int vectorPerCluster;
|
||||||
|
|
||||||
|
@ -53,7 +54,6 @@ public class DefaultIVFVectorsWriter extends IVFVectorsWriter {
|
||||||
CentroidSupplier centroidSupplier,
|
CentroidSupplier centroidSupplier,
|
||||||
FloatVectorValues floatVectorValues,
|
FloatVectorValues floatVectorValues,
|
||||||
IndexOutput postingsOutput,
|
IndexOutput postingsOutput,
|
||||||
InfoStream infoStream,
|
|
||||||
IntArrayList[] assignmentsByCluster
|
IntArrayList[] assignmentsByCluster
|
||||||
) throws IOException {
|
) throws IOException {
|
||||||
// write the posting lists
|
// write the posting lists
|
||||||
|
@ -79,14 +79,14 @@ public class DefaultIVFVectorsWriter extends IVFVectorsWriter {
|
||||||
writePostingList(cluster, postingsOutput, binarizedByteVectorValues);
|
writePostingList(cluster, postingsOutput, binarizedByteVectorValues);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (infoStream.isEnabled(IVF_VECTOR_COMPONENT)) {
|
if (logger.isDebugEnabled()) {
|
||||||
printClusterQualityStatistics(assignmentsByCluster, infoStream);
|
printClusterQualityStatistics(assignmentsByCluster);
|
||||||
}
|
}
|
||||||
|
|
||||||
return offsets;
|
return offsets;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void printClusterQualityStatistics(IntArrayList[] clusters, InfoStream infoStream) {
|
private static void printClusterQualityStatistics(IntArrayList[] clusters) {
|
||||||
float min = Float.MAX_VALUE;
|
float min = Float.MAX_VALUE;
|
||||||
float max = Float.MIN_VALUE;
|
float max = Float.MIN_VALUE;
|
||||||
float mean = 0;
|
float mean = 0;
|
||||||
|
@ -105,20 +105,14 @@ public class DefaultIVFVectorsWriter extends IVFVectorsWriter {
|
||||||
max = Math.max(max, cluster.size());
|
max = Math.max(max, cluster.size());
|
||||||
}
|
}
|
||||||
float variance = m2 / (clusters.length - 1);
|
float variance = m2 / (clusters.length - 1);
|
||||||
infoStream.message(
|
logger.debug(
|
||||||
IVF_VECTOR_COMPONENT,
|
"Centroid count: {} min: {} max: {} mean: {} stdDev: {} variance: {}",
|
||||||
"Centroid count: "
|
clusters.length,
|
||||||
+ clusters.length
|
min,
|
||||||
+ " min: "
|
max,
|
||||||
+ min
|
mean,
|
||||||
+ " max: "
|
Math.sqrt(variance),
|
||||||
+ max
|
variance
|
||||||
+ " mean: "
|
|
||||||
+ mean
|
|
||||||
+ " stdDev: "
|
|
||||||
+ Math.sqrt(variance)
|
|
||||||
+ " variance: "
|
|
||||||
+ variance
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -208,17 +202,16 @@ public class DefaultIVFVectorsWriter extends IVFVectorsWriter {
|
||||||
float[] globalCentroid
|
float[] globalCentroid
|
||||||
) throws IOException {
|
) throws IOException {
|
||||||
// TODO: take advantage of prior generated clusters from mergeState in the future
|
// TODO: take advantage of prior generated clusters from mergeState in the future
|
||||||
return calculateAndWriteCentroids(fieldInfo, floatVectorValues, centroidOutput, mergeState.infoStream, globalCentroid, false);
|
return calculateAndWriteCentroids(fieldInfo, floatVectorValues, centroidOutput, globalCentroid, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
CentroidAssignments calculateAndWriteCentroids(
|
CentroidAssignments calculateAndWriteCentroids(
|
||||||
FieldInfo fieldInfo,
|
FieldInfo fieldInfo,
|
||||||
FloatVectorValues floatVectorValues,
|
FloatVectorValues floatVectorValues,
|
||||||
IndexOutput centroidOutput,
|
IndexOutput centroidOutput,
|
||||||
InfoStream infoStream,
|
|
||||||
float[] globalCentroid
|
float[] globalCentroid
|
||||||
) throws IOException {
|
) throws IOException {
|
||||||
return calculateAndWriteCentroids(fieldInfo, floatVectorValues, centroidOutput, infoStream, globalCentroid, true);
|
return calculateAndWriteCentroids(fieldInfo, floatVectorValues, centroidOutput, globalCentroid, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -228,7 +221,6 @@ public class DefaultIVFVectorsWriter extends IVFVectorsWriter {
|
||||||
* @param fieldInfo merging field info
|
* @param fieldInfo merging field info
|
||||||
* @param floatVectorValues the float vector values to merge
|
* @param floatVectorValues the float vector values to merge
|
||||||
* @param centroidOutput the centroid output
|
* @param centroidOutput the centroid output
|
||||||
* @param infoStream the merge state
|
|
||||||
* @param globalCentroid the global centroid, calculated by this method and used to quantize the centroids
|
* @param globalCentroid the global centroid, calculated by this method and used to quantize the centroids
|
||||||
* @param cacheCentroids whether the centroids are kept or discarded once computed
|
* @param cacheCentroids whether the centroids are kept or discarded once computed
|
||||||
* @return the vector assignments, soar assignments, and if asked the centroids themselves that were computed
|
* @return the vector assignments, soar assignments, and if asked the centroids themselves that were computed
|
||||||
|
@ -238,7 +230,6 @@ public class DefaultIVFVectorsWriter extends IVFVectorsWriter {
|
||||||
FieldInfo fieldInfo,
|
FieldInfo fieldInfo,
|
||||||
FloatVectorValues floatVectorValues,
|
FloatVectorValues floatVectorValues,
|
||||||
IndexOutput centroidOutput,
|
IndexOutput centroidOutput,
|
||||||
InfoStream infoStream,
|
|
||||||
float[] globalCentroid,
|
float[] globalCentroid,
|
||||||
boolean cacheCentroids
|
boolean cacheCentroids
|
||||||
) throws IOException {
|
) throws IOException {
|
||||||
|
@ -266,12 +257,9 @@ public class DefaultIVFVectorsWriter extends IVFVectorsWriter {
|
||||||
// write centroids
|
// write centroids
|
||||||
writeCentroids(centroids, fieldInfo, globalCentroid, centroidOutput);
|
writeCentroids(centroids, fieldInfo, globalCentroid, centroidOutput);
|
||||||
|
|
||||||
if (infoStream.isEnabled(IVF_VECTOR_COMPONENT)) {
|
if (logger.isDebugEnabled()) {
|
||||||
infoStream.message(
|
logger.debug("calculate centroids and assign vectors time ms: {}", (System.nanoTime() - nanoTime) / 1000000.0);
|
||||||
IVF_VECTOR_COMPONENT,
|
logger.debug("final centroid count: {}", centroids.length);
|
||||||
"calculate centroids and assign vectors time ms: " + ((System.nanoTime() - nanoTime) / 1000000.0)
|
|
||||||
);
|
|
||||||
infoStream.message(IVF_VECTOR_COMPONENT, "final centroid count: " + centroids.length);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
IntArrayList[] assignmentsByCluster = new IntArrayList[centroids.length];
|
IntArrayList[] assignmentsByCluster = new IntArrayList[centroids.length];
|
||||||
|
|
|
@ -242,8 +242,8 @@ public class ES814ScalarQuantizedVectorsFormat extends FlatVectorsFormat {
|
||||||
final FlatVectorsScorer delegate;
|
final FlatVectorsScorer delegate;
|
||||||
final VectorScorerFactory factory;
|
final VectorScorerFactory factory;
|
||||||
|
|
||||||
ESFlatVectorsScorer(FlatVectorsScorer delegte) {
|
ESFlatVectorsScorer(FlatVectorsScorer delegate) {
|
||||||
this.delegate = delegte;
|
this.delegate = delegate;
|
||||||
factory = VectorScorerFactory.instance().orElse(null);
|
factory = VectorScorerFactory.instance().orElse(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -45,7 +45,6 @@ import java.io.IOException;
|
||||||
*/
|
*/
|
||||||
public class IVFVectorsFormat extends KnnVectorsFormat {
|
public class IVFVectorsFormat extends KnnVectorsFormat {
|
||||||
|
|
||||||
public static final String IVF_VECTOR_COMPONENT = "IVF";
|
|
||||||
public static final String NAME = "IVFVectorsFormat";
|
public static final String NAME = "IVFVectorsFormat";
|
||||||
// centroid ordinals -> centroid values, offsets
|
// centroid ordinals -> centroid values, offsets
|
||||||
public static final String CENTROID_EXTENSION = "cenivf";
|
public static final String CENTROID_EXTENSION = "cenivf";
|
||||||
|
|
|
@ -28,7 +28,6 @@ import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.InfoStream;
|
|
||||||
import org.apache.lucene.util.VectorUtil;
|
import org.apache.lucene.util.VectorUtil;
|
||||||
import org.elasticsearch.core.IOUtils;
|
import org.elasticsearch.core.IOUtils;
|
||||||
import org.elasticsearch.core.SuppressForbidden;
|
import org.elasticsearch.core.SuppressForbidden;
|
||||||
|
@ -134,7 +133,6 @@ public abstract class IVFVectorsWriter extends KnnVectorsWriter {
|
||||||
FieldInfo fieldInfo,
|
FieldInfo fieldInfo,
|
||||||
FloatVectorValues floatVectorValues,
|
FloatVectorValues floatVectorValues,
|
||||||
IndexOutput centroidOutput,
|
IndexOutput centroidOutput,
|
||||||
InfoStream infoStream,
|
|
||||||
float[] globalCentroid
|
float[] globalCentroid
|
||||||
) throws IOException;
|
) throws IOException;
|
||||||
|
|
||||||
|
@ -143,7 +141,6 @@ public abstract class IVFVectorsWriter extends KnnVectorsWriter {
|
||||||
CentroidSupplier centroidSupplier,
|
CentroidSupplier centroidSupplier,
|
||||||
FloatVectorValues floatVectorValues,
|
FloatVectorValues floatVectorValues,
|
||||||
IndexOutput postingsOutput,
|
IndexOutput postingsOutput,
|
||||||
InfoStream infoStream,
|
|
||||||
IntArrayList[] assignmentsByCluster
|
IntArrayList[] assignmentsByCluster
|
||||||
) throws IOException;
|
) throws IOException;
|
||||||
|
|
||||||
|
@ -168,7 +165,6 @@ public abstract class IVFVectorsWriter extends KnnVectorsWriter {
|
||||||
fieldWriter.fieldInfo,
|
fieldWriter.fieldInfo,
|
||||||
floatVectorValues,
|
floatVectorValues,
|
||||||
ivfCentroids,
|
ivfCentroids,
|
||||||
segmentWriteState.infoStream,
|
|
||||||
globalCentroid
|
globalCentroid
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -180,7 +176,6 @@ public abstract class IVFVectorsWriter extends KnnVectorsWriter {
|
||||||
centroidSupplier,
|
centroidSupplier,
|
||||||
floatVectorValues,
|
floatVectorValues,
|
||||||
ivfClusters,
|
ivfClusters,
|
||||||
segmentWriteState.infoStream,
|
|
||||||
centroidAssignments.assignmentsByCluster()
|
centroidAssignments.assignmentsByCluster()
|
||||||
);
|
);
|
||||||
// write posting lists
|
// write posting lists
|
||||||
|
@ -313,7 +308,6 @@ public abstract class IVFVectorsWriter extends KnnVectorsWriter {
|
||||||
centroidSupplier,
|
centroidSupplier,
|
||||||
floatVectorValues,
|
floatVectorValues,
|
||||||
ivfClusters,
|
ivfClusters,
|
||||||
mergeState.infoStream,
|
|
||||||
centroidAssignments.assignmentsByCluster()
|
centroidAssignments.assignmentsByCluster()
|
||||||
);
|
);
|
||||||
assert offsets.length == centroidSupplier.size();
|
assert offsets.length == centroidSupplier.size();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue