diff --git a/qa/vector/build.gradle b/qa/vector/build.gradle index 52b2eeae8226..533b14363bee 100644 --- a/qa/vector/build.gradle +++ b/qa/vector/build.gradle @@ -7,6 +7,8 @@ * License v3.0 only", or the "Server Side Public License, v 1". */ +import org.elasticsearch.gradle.internal.test.TestUtil + apply plugin: 'elasticsearch.java' apply plugin: 'elasticsearch.build' @@ -23,6 +25,8 @@ dependencies { api "org.apache.lucene:lucene-core:${versions.lucene}" api "org.apache.lucene:lucene-queries:${versions.lucene}" api "org.apache.lucene:lucene-codecs:${versions.lucene}" + implementation project(':libs:simdvec') + implementation project(':libs:native') implementation project(':libs:logging') implementation project(':server') } @@ -37,6 +41,7 @@ tasks.register("checkVec", JavaExec) { // Configure logging to console systemProperty "es.logger.out", "console" systemProperty "es.logger.level", "INFO" // Change to DEBUG if needed + systemProperty 'es.nativelibs.path', TestUtil.getTestLibraryPath(file("../../libs/native/libraries/build/platform/").toString()) if (buildParams.getRuntimeJavaVersion().map { it.majorVersion.toInteger() }.get() >= 21) { jvmArgs '-Xms4g', '-Xmx4g', '--add-modules=jdk.incubator.vector', '--enable-native-access=ALL-UNNAMED', '-Djava.util.concurrent.ForkJoinPool.common.parallelism=8', '-XX:+UnlockDiagnosticVMOptions', '-XX:+DebugNonSafepoints', '-XX:+HeapDumpOnOutOfMemoryError' diff --git a/qa/vector/src/main/java/module-info.java b/qa/vector/src/main/java/module-info.java index 9b9f58555968..b6647aafeb01 100644 --- a/qa/vector/src/main/java/module-info.java +++ b/qa/vector/src/main/java/module-info.java @@ -11,6 +11,7 @@ module org.elasticsearch.test.knn { requires org.elasticsearch.base; requires org.elasticsearch.server; requires org.elasticsearch.xcontent; + requires org.elasticsearch.cli; requires org.apache.lucene.core; requires org.apache.lucene.codecs; requires org.apache.lucene.queries; diff --git a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java index d3f707d191bc..dcce1fd304b0 100644 --- a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java +++ b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java @@ -15,8 +15,10 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.lucene101.Lucene101Codec; import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; +import org.elasticsearch.cli.ProcessInfo; import org.elasticsearch.common.Strings; import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.PathUtils; import org.elasticsearch.index.codec.vectors.ES813Int8FlatVectorFormat; import org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat; @@ -24,6 +26,8 @@ import org.elasticsearch.index.codec.vectors.IVFVectorsFormat; import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat; import org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat; import org.elasticsearch.logging.Level; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentParserConfiguration; import org.elasticsearch.xcontent.XContentType; @@ -35,19 +39,26 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.List; import java.util.Locale; +import java.util.Map; /** * A utility class to create and test KNN indices using Lucene. * It supports various index types (HNSW, FLAT, IVF) and configurations. */ public class KnnIndexTester { - static final Level LOG_LEVEL = Level.DEBUG; - - static final SysOutLogger logger = new SysOutLogger(); + static final Logger logger; static { LogConfigurator.loadLog4jPlugins(); - LogConfigurator.configureESLogging(); // native access requires logging to be initialized + + // necessary otherwise the es.logger.level system configuration in build.gradle is ignored + ProcessInfo pinfo = ProcessInfo.fromSystem(); + Map sysprops = pinfo.sysprops(); + String loggerLevel = sysprops.getOrDefault("es.logger.level", Level.INFO.name()); + Settings settings = Settings.builder().put("logger.level", loggerLevel).build(); + LogConfigurator.configureWithoutConfig(settings); + + logger = LogManager.getLogger(KnnIndexTester.class); } static final String INDEX_DIR = "target/knn_index"; @@ -163,7 +174,7 @@ public class KnnIndexTester { FormattedResults formattedResults = new FormattedResults(); for (CmdLineArgs cmdLineArgs : cmdLineArgsList) { Results result = new Results(cmdLineArgs.indexType().name().toLowerCase(Locale.ROOT), cmdLineArgs.numDocs()); - System.out.println("Running KNN index tester with arguments: " + cmdLineArgs); + logger.info("Running KNN index tester with arguments: " + cmdLineArgs); Codec codec = createCodec(cmdLineArgs); Path indexPath = PathUtils.get(formatIndexPath(cmdLineArgs)); if (cmdLineArgs.reindex() || cmdLineArgs.forceMerge()) { @@ -195,8 +206,7 @@ public class KnnIndexTester { } formattedResults.results.add(result); } - System.out.println("Results:"); - System.out.println(formattedResults); + logger.info("Results: \n" + formattedResults); } static class FormattedResults { @@ -326,57 +336,6 @@ public class KnnIndexTester { } } - static final class SysOutLogger { - - void warn(String message) { - if (LOG_LEVEL.ordinal() >= Level.WARN.ordinal()) { - System.out.println(message); - } - } - - void warn(String message, Object... params) { - if (LOG_LEVEL.ordinal() >= Level.WARN.ordinal()) { - System.out.println(String.format(Locale.ROOT, message, params)); - } - } - - void info(String message) { - if (LOG_LEVEL.ordinal() >= Level.INFO.ordinal()) { - System.out.println(message); - } - } - - void info(String message, Object... params) { - if (LOG_LEVEL.ordinal() >= Level.INFO.ordinal()) { - System.out.println(String.format(Locale.ROOT, message, params)); - } - } - - void debug(String message) { - if (LOG_LEVEL.ordinal() >= Level.DEBUG.ordinal()) { - System.out.println(message); - } - } - - void debug(String message, Object... params) { - if (LOG_LEVEL.ordinal() >= Level.DEBUG.ordinal()) { - System.out.println(String.format(Locale.ROOT, message, params)); - } - } - - void trace(String message) { - if (LOG_LEVEL == Level.TRACE) { - System.out.println(message); - } - } - - void trace(String message, Object... params) { - if (LOG_LEVEL == Level.TRACE) { - System.out.println(String.format(Locale.ROOT, message, params)); - } - } - } - static final class ThreadDetails { private static final ThreadMXBean threadBean = (ThreadMXBean) java.lang.management.ManagementFactory.getThreadMXBean(); public final long[] threadIDs; diff --git a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexer.java b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexer.java index b6fc2ebd8b00..f0c1648ae52d 100644 --- a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexer.java +++ b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexer.java @@ -117,7 +117,7 @@ class KnnIndexer { } }); logger.debug( - "KnnIndexer: using codec=%s, vectorEncoding=%s, dim=%d, similarityFunction=%s", + "KnnIndexer: using codec={}, vectorEncoding={}, dim={}, similarityFunction={}", codec.getName(), vectorEncoding, dim, @@ -125,7 +125,7 @@ class KnnIndexer { ); if (Files.exists(indexPath)) { - logger.debug("KnnIndexer: existing index at %s", indexPath); + logger.debug("KnnIndexer: existing index at {}", indexPath); } else { Files.createDirectories(indexPath); } @@ -143,7 +143,7 @@ class KnnIndexer { ); } logger.info( - "docsPathSizeInBytes=%d, dim=%d, vectorEncoding=%s, byteSize=%d", + "docsPathSizeInBytes={}, dim={}, vectorEncoding={}, byteSize={}", docsPathSizeInBytes, dim, vectorEncoding, @@ -170,7 +170,7 @@ class KnnIndexer { } long elapsed = System.nanoTime() - start; - logger.debug("Indexing took %d ms for %d docs", TimeUnit.NANOSECONDS.toMillis(elapsed), numDocs); + logger.debug("Indexing took {} ms for {} docs", TimeUnit.NANOSECONDS.toMillis(elapsed), numDocs); result.indexTimeMS = TimeUnit.NANOSECONDS.toMillis(elapsed); } @@ -183,14 +183,14 @@ class KnnIndexer { } }); iwc.setCodec(codec); - logger.debug("KnnIndexer: forceMerge in %s", indexPath); + logger.debug("KnnIndexer: forceMerge in {}", indexPath); long startNS = System.nanoTime(); try (IndexWriter iw = new IndexWriter(FSDirectory.open(indexPath), iwc)) { iw.forceMerge(1); } long endNS = System.nanoTime(); long elapsedNSec = (endNS - startNS); - logger.info("forceMerge took %d ms", TimeUnit.NANOSECONDS.toMillis(elapsedNSec)); + logger.info("forceMerge took {} ms", TimeUnit.NANOSECONDS.toMillis(elapsedNSec)); results.forceMergeTimeMS = TimeUnit.NANOSECONDS.toMillis(elapsedNSec); } diff --git a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnSearcher.java b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnSearcher.java index 5a951774ce22..ec90dd46ef5b 100644 --- a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnSearcher.java +++ b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnSearcher.java @@ -181,7 +181,7 @@ class KnnSearcher { resultIds[i] = getResultIds(results[i], storedFields); } logger.info( - "completed %d searches in %d ms: %d QPS CPU time=%dms", + "completed {} searches in {} ms: {} QPS CPU time={}ms", numQueryVectors, elapsed, (1000L * numQueryVectors) / elapsed, diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java index 2527a91074e7..cac8cc7cd248 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java @@ -17,11 +17,12 @@ import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.internal.hppc.IntArrayList; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.VectorUtil; import org.apache.lucene.util.quantization.OptimizedScalarQuantizer; import org.elasticsearch.index.codec.vectors.cluster.HierarchicalKMeans; import org.elasticsearch.index.codec.vectors.cluster.KMeansResult; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; import org.elasticsearch.simdvec.ES91OSQVectorsScorer; import java.io.IOException; @@ -31,7 +32,6 @@ import java.nio.ByteOrder; import static org.apache.lucene.codecs.lucene102.Lucene102BinaryQuantizedVectorsFormat.INDEX_BITS; import static org.apache.lucene.util.quantization.OptimizedScalarQuantizer.discretize; import static org.apache.lucene.util.quantization.OptimizedScalarQuantizer.packAsBinary; -import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.IVF_VECTOR_COMPONENT; /** * Default implementation of {@link IVFVectorsWriter}. It uses {@link HierarchicalKMeans} algorithm to @@ -39,6 +39,7 @@ import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.IVF_VECTOR_ * fashion. */ public class DefaultIVFVectorsWriter extends IVFVectorsWriter { + private static final Logger logger = LogManager.getLogger(DefaultIVFVectorsWriter.class); private final int vectorPerCluster; @@ -53,7 +54,6 @@ public class DefaultIVFVectorsWriter extends IVFVectorsWriter { CentroidSupplier centroidSupplier, FloatVectorValues floatVectorValues, IndexOutput postingsOutput, - InfoStream infoStream, IntArrayList[] assignmentsByCluster ) throws IOException { // write the posting lists @@ -79,14 +79,14 @@ public class DefaultIVFVectorsWriter extends IVFVectorsWriter { writePostingList(cluster, postingsOutput, binarizedByteVectorValues); } - if (infoStream.isEnabled(IVF_VECTOR_COMPONENT)) { - printClusterQualityStatistics(assignmentsByCluster, infoStream); + if (logger.isDebugEnabled()) { + printClusterQualityStatistics(assignmentsByCluster); } return offsets; } - private static void printClusterQualityStatistics(IntArrayList[] clusters, InfoStream infoStream) { + private static void printClusterQualityStatistics(IntArrayList[] clusters) { float min = Float.MAX_VALUE; float max = Float.MIN_VALUE; float mean = 0; @@ -105,20 +105,14 @@ public class DefaultIVFVectorsWriter extends IVFVectorsWriter { max = Math.max(max, cluster.size()); } float variance = m2 / (clusters.length - 1); - infoStream.message( - IVF_VECTOR_COMPONENT, - "Centroid count: " - + clusters.length - + " min: " - + min - + " max: " - + max - + " mean: " - + mean - + " stdDev: " - + Math.sqrt(variance) - + " variance: " - + variance + logger.debug( + "Centroid count: {} min: {} max: {} mean: {} stdDev: {} variance: {}", + clusters.length, + min, + max, + mean, + Math.sqrt(variance), + variance ); } @@ -208,17 +202,16 @@ public class DefaultIVFVectorsWriter extends IVFVectorsWriter { float[] globalCentroid ) throws IOException { // TODO: take advantage of prior generated clusters from mergeState in the future - return calculateAndWriteCentroids(fieldInfo, floatVectorValues, centroidOutput, mergeState.infoStream, globalCentroid, false); + return calculateAndWriteCentroids(fieldInfo, floatVectorValues, centroidOutput, globalCentroid, false); } CentroidAssignments calculateAndWriteCentroids( FieldInfo fieldInfo, FloatVectorValues floatVectorValues, IndexOutput centroidOutput, - InfoStream infoStream, float[] globalCentroid ) throws IOException { - return calculateAndWriteCentroids(fieldInfo, floatVectorValues, centroidOutput, infoStream, globalCentroid, true); + return calculateAndWriteCentroids(fieldInfo, floatVectorValues, centroidOutput, globalCentroid, true); } /** @@ -228,7 +221,6 @@ public class DefaultIVFVectorsWriter extends IVFVectorsWriter { * @param fieldInfo merging field info * @param floatVectorValues the float vector values to merge * @param centroidOutput the centroid output - * @param infoStream the merge state * @param globalCentroid the global centroid, calculated by this method and used to quantize the centroids * @param cacheCentroids whether the centroids are kept or discarded once computed * @return the vector assignments, soar assignments, and if asked the centroids themselves that were computed @@ -238,7 +230,6 @@ public class DefaultIVFVectorsWriter extends IVFVectorsWriter { FieldInfo fieldInfo, FloatVectorValues floatVectorValues, IndexOutput centroidOutput, - InfoStream infoStream, float[] globalCentroid, boolean cacheCentroids ) throws IOException { @@ -266,12 +257,9 @@ public class DefaultIVFVectorsWriter extends IVFVectorsWriter { // write centroids writeCentroids(centroids, fieldInfo, globalCentroid, centroidOutput); - if (infoStream.isEnabled(IVF_VECTOR_COMPONENT)) { - infoStream.message( - IVF_VECTOR_COMPONENT, - "calculate centroids and assign vectors time ms: " + ((System.nanoTime() - nanoTime) / 1000000.0) - ); - infoStream.message(IVF_VECTOR_COMPONENT, "final centroid count: " + centroids.length); + if (logger.isDebugEnabled()) { + logger.debug("calculate centroids and assign vectors time ms: {}", (System.nanoTime() - nanoTime) / 1000000.0); + logger.debug("final centroid count: {}", centroids.length); } IntArrayList[] assignmentsByCluster = new IntArrayList[centroids.length]; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java index 84981c3d2a1b..3dde6fab00d4 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java @@ -242,8 +242,8 @@ public class ES814ScalarQuantizedVectorsFormat extends FlatVectorsFormat { final FlatVectorsScorer delegate; final VectorScorerFactory factory; - ESFlatVectorsScorer(FlatVectorsScorer delegte) { - this.delegate = delegte; + ESFlatVectorsScorer(FlatVectorsScorer delegate) { + this.delegate = delegate; factory = VectorScorerFactory.instance().orElse(null); } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormat.java index 03e4a38b169d..d2495dfc89ac 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormat.java @@ -45,7 +45,6 @@ import java.io.IOException; */ public class IVFVectorsFormat extends KnnVectorsFormat { - public static final String IVF_VECTOR_COMPONENT = "IVF"; public static final String NAME = "IVFVectorsFormat"; // centroid ordinals -> centroid values, offsets public static final String CENTROID_EXTENSION = "cenivf"; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsWriter.java index 73a0bdd5efa4..a29c21a15870 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsWriter.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsWriter.java @@ -28,7 +28,6 @@ import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.VectorUtil; import org.elasticsearch.core.IOUtils; import org.elasticsearch.core.SuppressForbidden; @@ -134,7 +133,6 @@ public abstract class IVFVectorsWriter extends KnnVectorsWriter { FieldInfo fieldInfo, FloatVectorValues floatVectorValues, IndexOutput centroidOutput, - InfoStream infoStream, float[] globalCentroid ) throws IOException; @@ -143,7 +141,6 @@ public abstract class IVFVectorsWriter extends KnnVectorsWriter { CentroidSupplier centroidSupplier, FloatVectorValues floatVectorValues, IndexOutput postingsOutput, - InfoStream infoStream, IntArrayList[] assignmentsByCluster ) throws IOException; @@ -168,7 +165,6 @@ public abstract class IVFVectorsWriter extends KnnVectorsWriter { fieldWriter.fieldInfo, floatVectorValues, ivfCentroids, - segmentWriteState.infoStream, globalCentroid ); @@ -180,7 +176,6 @@ public abstract class IVFVectorsWriter extends KnnVectorsWriter { centroidSupplier, floatVectorValues, ivfClusters, - segmentWriteState.infoStream, centroidAssignments.assignmentsByCluster() ); // write posting lists @@ -313,7 +308,6 @@ public abstract class IVFVectorsWriter extends KnnVectorsWriter { centroidSupplier, floatVectorValues, ivfClusters, - mergeState.infoStream, centroidAssignments.assignmentsByCluster() ); assert offsets.length == centroidSupplier.size();