Upgrade to Lucene 10.2.0 (#126594)

This commit upgrade Elasticsearch to lucene 10.2.0
This commit is contained in:
Ignacio Vera 2025-04-14 13:50:52 +02:00 committed by GitHub
parent 58a29396a5
commit ffdfcec334
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 509 additions and 356 deletions

View file

@ -19,6 +19,7 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
import org.apache.lucene.util.quantization.ScalarQuantizer;
import org.elasticsearch.common.logging.LogConfigurator;
@ -76,10 +77,10 @@ public class VectorScorerBenchmark {
float vec2Offset;
float scoreCorrectionConstant;
RandomVectorScorer luceneDotScorer;
RandomVectorScorer luceneSqrScorer;
RandomVectorScorer nativeDotScorer;
RandomVectorScorer nativeSqrScorer;
UpdateableRandomVectorScorer luceneDotScorer;
UpdateableRandomVectorScorer luceneSqrScorer;
UpdateableRandomVectorScorer nativeDotScorer;
UpdateableRandomVectorScorer nativeSqrScorer;
RandomVectorScorer luceneDotScorerQuery;
RandomVectorScorer nativeDotScorerQuery;
@ -118,12 +119,16 @@ public class VectorScorerBenchmark {
in = dir.openInput("vector.data", IOContext.DEFAULT);
var values = vectorValues(dims, 2, in, VectorSimilarityFunction.DOT_PRODUCT);
scoreCorrectionConstant = values.getScalarQuantizer().getConstantMultiplier();
luceneDotScorer = luceneScoreSupplier(values, VectorSimilarityFunction.DOT_PRODUCT).scorer(0);
luceneDotScorer = luceneScoreSupplier(values, VectorSimilarityFunction.DOT_PRODUCT).scorer();
luceneDotScorer.setScoringOrdinal(0);
values = vectorValues(dims, 2, in, VectorSimilarityFunction.EUCLIDEAN);
luceneSqrScorer = luceneScoreSupplier(values, VectorSimilarityFunction.EUCLIDEAN).scorer(0);
luceneSqrScorer = luceneScoreSupplier(values, VectorSimilarityFunction.EUCLIDEAN).scorer();
luceneSqrScorer.setScoringOrdinal(0);
nativeDotScorer = factory.getInt7SQVectorScorerSupplier(DOT_PRODUCT, in, values, scoreCorrectionConstant).get().scorer(0);
nativeSqrScorer = factory.getInt7SQVectorScorerSupplier(EUCLIDEAN, in, values, scoreCorrectionConstant).get().scorer(0);
nativeDotScorer = factory.getInt7SQVectorScorerSupplier(DOT_PRODUCT, in, values, scoreCorrectionConstant).get().scorer();
nativeDotScorer.setScoringOrdinal(0);
nativeSqrScorer = factory.getInt7SQVectorScorerSupplier(EUCLIDEAN, in, values, scoreCorrectionConstant).get().scorer();
nativeSqrScorer.setScoringOrdinal(0);
// setup for getInt7SQVectorScorer / query vector scoring
float[] queryVec = new float[dims];

View file

@ -1,5 +1,5 @@
elasticsearch = 9.1.0
lucene = 10.1.0
lucene = 10.2.0
bundled_jdk_vendor = openjdk
bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90
@ -8,7 +8,7 @@ spatial4j = 0.7
jts = 1.15.0
jackson = 2.15.0
snakeyaml = 2.0
icu4j = 68.2
icu4j = 77.1
supercsv = 2.4.0
log4j = 2.19.0
slf4j = 2.0.6

View file

@ -1,8 +1,8 @@
include::{docs-root}/shared/versions/stack/{source_branch}.asciidoc[]
:lucene_version: 10.1.0
:lucene_version_path: 10_1_0
:lucene_version: 10.2.0
:lucene_version_path: 10_2_0
:jdk: 11.0.2
:jdk_major: 11
:build_type: tar

View file

@ -0,0 +1,5 @@
pr: 126594
summary: Upgrade to Lucene 10.2.0
area: Search
type: upgrade
issues: []

View file

@ -911,21 +911,16 @@
<sha256 value="37f5216e14af2772930dff9b8734353f0a80e89ba3f33e065441de6537c5e842" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.ibm.icu" name="icu4j" version="68.2">
<artifact name="icu4j-68.2.jar">
<sha256 value="9bd7bf869a44ba8aeb0cddd7e6616e88cd4795ba5bfce2230447cb0e185a646c" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.ibm.icu" name="icu4j" version="71.1">
<artifact name="icu4j-71.1.jar">
<sha256 value="91c4f8ebf0ceb489547098fe9d5c09a65eb419caea6ed714867f5280800bcf1a" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.ibm.icu" name="icu4j" version="72.1">
<artifact name="icu4j-72.1.jar">
<sha256 value="3df572b240a68d13b5cd778ad2393e885d26411434cd8f098ac5987ea2e64ce3" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.ibm.icu" name="icu4j" version="77.1">
<artifact name="icu4j-77.1.jar">
<sha256 value="b3640b9f416a4411fd33c59abbeea8fd57d024c23e1819bf9673220a97499fe3" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.jamesmurty.utils" name="java-xmlbuilder" version="0.4">
<artifact name="java-xmlbuilder-0.4.jar">
<sha256 value="681e53c4ffd59fa12068803b259e3a83d43f07a47c112e748a187dee179eb31f" origin="Generated by Gradle"/>
@ -2966,179 +2961,129 @@
<sha256 value="015d5c229f3cd5c0ebf175c1da08d596d94043362ae9d92637d88848c90537c8" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-analysis-common" version="10.1.0">
<artifact name="lucene-analysis-common-10.1.0.jar">
<sha256 value="31556c9a364c345677c5651532de4341cbf92663725a977df002e83adba2e91d" origin="Generated by Gradle">
<also-trust value="6e3730816710f1caa50342034c18fbd705a5f3ac88909279fa1f7c5e1e04405a"/>
</sha256>
<component group="org.apache.lucene" name="lucene-analysis-common" version="10.2.0">
<artifact name="lucene-analysis-common-10.2.0.jar">
<sha256 value="11dae94401d520fade4cf1a97773e1a83b0c6434243c504670f2cbe33cbbc11d" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-analysis-icu" version="10.1.0">
<artifact name="lucene-analysis-icu-10.1.0.jar">
<sha256 value="8fbb8f1506daf91ab9d6b8f504036cedd82eff8dba2e6a0124806a86f6776c45" origin="Generated by Gradle">
<also-trust value="a5702e4a8608e5ade6338103d8f3b264887e955b5429944acbdb9eb58f1aacdf"/>
</sha256>
<component group="org.apache.lucene" name="lucene-analysis-icu" version="10.2.0">
<artifact name="lucene-analysis-icu-10.2.0.jar">
<sha256 value="418ebd80950e43ff3cab04545f4356a1a6f6487356d5939a7105830001427c77" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-analysis-kuromoji" version="10.1.0">
<artifact name="lucene-analysis-kuromoji-10.1.0.jar">
<sha256 value="7f0123efbe713a9f1fa27df64a4af884719057975eb0676667e327aac80a6add" origin="Generated by Gradle">
<also-trust value="b4b57c41e96f3c5ac528d9ead6e10d6f9888d0f03259275a5c68be6faa320745"/>
</sha256>
<component group="org.apache.lucene" name="lucene-analysis-kuromoji" version="10.2.0">
<artifact name="lucene-analysis-kuromoji-10.2.0.jar">
<sha256 value="c47a368fffac76020e310fe7e45207ab3c9d54967f7a035d2f6453fd4f98702d" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-analysis-morfologik" version="10.1.0">
<artifact name="lucene-analysis-morfologik-10.1.0.jar">
<sha256 value="9f20bcb2ed691b375c40a15752e9f09906c4dac1d36a309472fd6705b3515639" origin="Generated by Gradle">
<also-trust value="96eed0039370bd402f5c73a611bdfcbce38d7a6b012bb99f8ae62099d8925f9b"/>
</sha256>
<component group="org.apache.lucene" name="lucene-analysis-morfologik" version="10.2.0">
<artifact name="lucene-analysis-morfologik-10.2.0.jar">
<sha256 value="dd3153a94d43c3cdeea235a1edc9becb11ea74f5dbaf4a53005f3b6a77ac731d" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-analysis-nori" version="10.1.0">
<artifact name="lucene-analysis-nori-10.1.0.jar">
<sha256 value="c5779e7d987166fc926d17d9b3ded95fbbbfc08f2f0a08ab1ed1675cb8c48649" origin="Generated by Gradle">
<also-trust value="1e199f1fac7f839dd8946ea231b1d072ef985da2267b0f50648f2b023bc76d90"/>
</sha256>
<component group="org.apache.lucene" name="lucene-analysis-nori" version="10.2.0">
<artifact name="lucene-analysis-nori-10.2.0.jar">
<sha256 value="48e0cf2452be7538655dbd2f0d67a9453c5fbac515aaf5ceeb8e854045a2684b" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-analysis-phonetic" version="10.1.0">
<artifact name="lucene-analysis-phonetic-10.1.0.jar">
<sha256 value="a5481e024ee7a7b240e880e6e8fc1af15a65ab835d8286d7daf4a34c6d5cad80" origin="Generated by Gradle">
<also-trust value="01cbba2cae0d2ec099a86124dd09dd4bea15eacb5e73c6f90caa2d02b5aa15e9"/>
</sha256>
<component group="org.apache.lucene" name="lucene-analysis-phonetic" version="10.2.0">
<artifact name="lucene-analysis-phonetic-10.2.0.jar">
<sha256 value="993afea6401c89642345b23ab9eca70cdae9a1c038f1b6b7f1c64138392f695c" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-analysis-smartcn" version="10.1.0">
<artifact name="lucene-analysis-smartcn-10.1.0.jar">
<sha256 value="bc46f95e6ba5dc95f6d546d9dea60ec9938a660617a49cd59dacd2a7abd5a52b" origin="Generated by Gradle">
<also-trust value="14411b9b5f4592d7f62c13fb0f97c07100cfc5e58e2b6ea66fe213fcf5e7945b"/>
</sha256>
<component group="org.apache.lucene" name="lucene-analysis-smartcn" version="10.2.0">
<artifact name="lucene-analysis-smartcn-10.2.0.jar">
<sha256 value="5ed3d5d3310612dda2fb6ce26eb601d690931b2a5f56a702def5b3b998aa582c" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-analysis-stempel" version="10.1.0">
<artifact name="lucene-analysis-stempel-10.1.0.jar">
<sha256 value="ffd2571c296da3e82b06124ac43896d3828490256038f3243d26b1baefb2d451" origin="Generated by Gradle">
<also-trust value="03601385dbd511c67145e5923976942dc692d44ef3e8c385b78947de445b002c"/>
</sha256>
<component group="org.apache.lucene" name="lucene-analysis-stempel" version="10.2.0">
<artifact name="lucene-analysis-stempel-10.2.0.jar">
<sha256 value="908528bcc03b87d4ee314f6099f3333e3f1cd4a139e793e2e1ed554164341906" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-backward-codecs" version="10.1.0">
<artifact name="lucene-backward-codecs-10.1.0.jar">
<sha256 value="1c5aa8086d13a589dee4bbf79bd2cda6571fccd6fed6b7a56847b1bcc20231e5" origin="Generated by Gradle">
<also-trust value="4c5729631bfda4561277f493076c085889ecbf91c9e7e058fc3bbd658e78b3e8"/>
</sha256>
<component group="org.apache.lucene" name="lucene-backward-codecs" version="10.2.0">
<artifact name="lucene-backward-codecs-10.2.0.jar">
<sha256 value="5f91c3eba54cf25d990e9c6a3cd13185521ca61c546eb5603f0c258627d0dfc6" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-codecs" version="10.1.0">
<artifact name="lucene-codecs-10.1.0.jar">
<sha256 value="5f48b0a529482b5007d60cb4bbfeb5dcc24f37bae9ae783f8998def63866e4a6" origin="Generated by Gradle">
<also-trust value="cee89f831ba1fb8af6f24aa1dd2018020daa585b48c7d7ab02f8609cdc5bdb59"/>
</sha256>
<component group="org.apache.lucene" name="lucene-codecs" version="10.2.0">
<artifact name="lucene-codecs-10.2.0.jar">
<sha256 value="48aed5ccc90d3173a5484e073050e9396bb7333591e35c23188da48883bed071" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-core" version="10.1.0">
<artifact name="lucene-core-10.1.0.jar">
<sha256 value="c9bb8d28327b21f31c7441bba1c96978231f877622c8a5a449f14254af0050e7" origin="Generated by Gradle">
<also-trust value="dfe584dbe6808942667e6f1bd558b3ab1dbc27b318fa1c53fb242f3289a7adc5"/>
</sha256>
<component group="org.apache.lucene" name="lucene-core" version="10.2.0">
<artifact name="lucene-core-10.2.0.jar">
<sha256 value="15fc7bf52454c50e7107b068e2386b3cb2fdf024698bf7965eb3dd33e6813e76" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-expressions" version="10.1.0">
<artifact name="lucene-expressions-10.1.0.jar">
<sha256 value="8f4d6a5750f80173b48dde86ba80a75cb06c200888144e2a81b53c61f194467e" origin="Generated by Gradle">
<also-trust value="ab44f9e274843d55650eaf24dbb40e1b2062e241d07fa64a6db610db3d5b7d74"/>
</sha256>
<component group="org.apache.lucene" name="lucene-expressions" version="10.2.0">
<artifact name="lucene-expressions-10.2.0.jar">
<sha256 value="f27ad8ae738adfaf685249ea84d597cb2f13fd7ba427c2b468afb14dfba11613" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-facet" version="10.1.0">
<artifact name="lucene-facet-10.1.0.jar">
<sha256 value="6f8e4b58b858ec845f5bf02703e55bba7fdbc9c57d8c74d8e4f45437272fc9ea" origin="Generated by Gradle">
<also-trust value="f2c525ca5fb7341afd4a7ab346534bd5b201514365aeba5b62e908fb2e70acf0"/>
</sha256>
<component group="org.apache.lucene" name="lucene-facet" version="10.2.0">
<artifact name="lucene-facet-10.2.0.jar">
<sha256 value="4c822196f98576b52fd2fd6cf9b727c4066fca95c8c98074ff960f2a5f4178ee" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-grouping" version="10.1.0">
<artifact name="lucene-grouping-10.1.0.jar">
<sha256 value="92620829184e82b45b9cdb53d28bdabd46a9fad8ee9bec683af10b9cff0b8815" origin="Generated by Gradle">
<also-trust value="cad8b601a9ce85999d34395b79b8e06b35942b11469e8ecee065be7f01e6d220"/>
</sha256>
<component group="org.apache.lucene" name="lucene-grouping" version="10.2.0">
<artifact name="lucene-grouping-10.2.0.jar">
<sha256 value="24dc2dff712371abc753453cb1b563edbaf94a2191f9471f2bc21c89deae21cf" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-highlighter" version="10.1.0">
<artifact name="lucene-highlighter-10.1.0.jar">
<sha256 value="674c89120f533ba518043600b7ad2da665be649c76d7fdfd92f85d25060366b9" origin="Generated by Gradle">
<also-trust value="c2b4a2e2dff74a9f9cd740b7c800a89ecbac3ce203cb67e2ead620fa1b70e72d"/>
</sha256>
<component group="org.apache.lucene" name="lucene-highlighter" version="10.2.0">
<artifact name="lucene-highlighter-10.2.0.jar">
<sha256 value="68b867175d2ce9d9fb42256423d47fb4045163176f9e3c2614086657d2442a77" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-join" version="10.1.0">
<artifact name="lucene-join-10.1.0.jar">
<sha256 value="6ab3faee6c953dd7f8c972610cefa0250d815bf99fad37e435474067c25316b9" origin="Generated by Gradle">
<also-trust value="7ee2ed9a811db5b9f440bc5fc4c0ecae63da753bdba03fb12316661aae8f2ed5"/>
</sha256>
<component group="org.apache.lucene" name="lucene-join" version="10.2.0">
<artifact name="lucene-join-10.2.0.jar">
<sha256 value="4307f925407a47938b5b284edd70ff9ea28d283ffdec23d7485db6cb6c48eecb" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-memory" version="10.1.0">
<artifact name="lucene-memory-10.1.0.jar">
<sha256 value="2242750f139d41f9d311f806787f59308990161f8882218663322ae46d847427" origin="Generated by Gradle">
<also-trust value="810c208fda40863e3f692365acd96cdb0054eeefbb3bced8a77726cccb035291"/>
</sha256>
<component group="org.apache.lucene" name="lucene-memory" version="10.2.0">
<artifact name="lucene-memory-10.2.0.jar">
<sha256 value="49e00517f1e090169e686f4073efa387c3ad782d4d4a22b2f33f510328d2cb92" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-misc" version="10.1.0">
<artifact name="lucene-misc-10.1.0.jar">
<sha256 value="58a3bed8b66fba748c02ea8a537544493a2864a7705b500bddcee611d208fd64" origin="Generated by Gradle">
<also-trust value="d1faab3d7542e8f22bd212109ed242dd72240ec02833ebe7bbf72e05e9ef37af"/>
</sha256>
<component group="org.apache.lucene" name="lucene-misc" version="10.2.0">
<artifact name="lucene-misc-10.2.0.jar">
<sha256 value="509c4a9ef26a8db257e8ea63eb77275c1fe43e8c33a8f19cc0393b1b58492ff0" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-queries" version="10.1.0">
<artifact name="lucene-queries-10.1.0.jar">
<sha256 value="e009f809b585b736d09aacaf10a6455ba4a2a55f539569b348aee7c54602b67b" origin="Generated by Gradle">
<also-trust value="6200da82653f2cceee9bb93f7458b326172ed4fb74fe36d3d4d03c228320e1ce"/>
</sha256>
<component group="org.apache.lucene" name="lucene-queries" version="10.2.0">
<artifact name="lucene-queries-10.2.0.jar">
<sha256 value="7b5335b9db3bba42f4c3a7188bbaf7602d2cad2f84787d4e569b329ed38e9367" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-queryparser" version="10.1.0">
<artifact name="lucene-queryparser-10.1.0.jar">
<sha256 value="7de6ddeeb901eccc8233427d705562392fca21ea4f88f3dc1667aacdbd37366c" origin="Generated by Gradle">
<also-trust value="84888c96bcbee568a8f2187d7c4f0ef296c42fb697d27164e2d95d35e7306bb9"/>
</sha256>
<component group="org.apache.lucene" name="lucene-queryparser" version="10.2.0">
<artifact name="lucene-queryparser-10.2.0.jar">
<sha256 value="43e58b4b89a399fb925a07343f787cdd1baa0353ddf7910d0053cd0aa2437d16" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-sandbox" version="10.1.0">
<artifact name="lucene-sandbox-10.1.0.jar">
<sha256 value="3525794a2c601ef8cbec472b5b75ce6fabf78eee85bd289b0dfd2eae12d2d59c" origin="Generated by Gradle">
<also-trust value="d96aae12b1c3f399bbfd104599c46493b206cea54d6ff0389a64b19cd0d063ab"/>
</sha256>
<component group="org.apache.lucene" name="lucene-sandbox" version="10.2.0">
<artifact name="lucene-sandbox-10.2.0.jar">
<sha256 value="53ae14d5a088f0a731e18c30d8a1b4cbfe028a832cb5e07aa9889d94f389f380" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-spatial-extras" version="10.1.0">
<artifact name="lucene-spatial-extras-10.1.0.jar">
<sha256 value="511108f060da0f8cffebc98e7f12e869f7985aaa72e4dd81cc62287559e16084" origin="Generated by Gradle">
<also-trust value="b316782f8b0f5d4bc8709a193d17ca8c67c89a468be258af4b47de393a11956e"/>
</sha256>
<component group="org.apache.lucene" name="lucene-spatial-extras" version="10.2.0">
<artifact name="lucene-spatial-extras-10.2.0.jar">
<sha256 value="f1e8d7970892de0ec56e8b58f5e3f9021f319755e87ad22ca9fa1a684cf38f2d" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-spatial3d" version="10.1.0">
<artifact name="lucene-spatial3d-10.1.0.jar">
<sha256 value="8129fbdc884bf959267592bf0b6b2b2f7679740811d8f59e9f27106a615b5129" origin="Generated by Gradle">
<also-trust value="9f654dacef5d2ac262a023de5ecd7c4c37867de9dae48a709296f8c72b42dcf0"/>
</sha256>
<component group="org.apache.lucene" name="lucene-spatial3d" version="10.2.0">
<artifact name="lucene-spatial3d-10.2.0.jar">
<sha256 value="52592455a5c92518f0e73424ada943600700f9c187f15eae936aa1fd2c4b9c51" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-suggest" version="10.1.0">
<artifact name="lucene-suggest-10.1.0.jar">
<sha256 value="bdad4d70408f26a66ee1fbaf79147188142e31b92aad1868bb2944980530691d" origin="Generated by Gradle">
<also-trust value="b0c6c8080f3bd4e1ba7c68cf6ed783749d719addef149fdcbad188a8590ab0b4"/>
</sha256>
<component group="org.apache.lucene" name="lucene-suggest" version="10.2.0">
<artifact name="lucene-suggest-10.2.0.jar">
<sha256 value="1fbcf35565bd7e9634795afd17deed8b51c6150d2b4165ca7cbfb76bf82b065d" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.lucene" name="lucene-test-framework" version="10.1.0">
<artifact name="lucene-test-framework-10.1.0.jar">
<sha256 value="d778febde38dd14c1c4c898233d200724547d3d97f9926387842786b5d26fc73" origin="Generated by Gradle">
<also-trust value="9f68130df15d2a3c96918090d9ca747cd5dc4ab7b0928b19d7ab9097477d13b4"/>
</sha256>
<component group="org.apache.lucene" name="lucene-test-framework" version="10.2.0">
<artifact name="lucene-test-framework-10.2.0.jar">
<sha256 value="6ab50d8cb5976c6707610cfa0b6238922f2bbd66307e45f831b18564627d454f" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.apache.maven" name="maven-api-meta" version="4.0.0-alpha-9">

View file

@ -10,8 +10,8 @@
package org.elasticsearch.simdvec.internal;
import org.apache.lucene.store.MemorySegmentAccessInput;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
import org.apache.lucene.util.quantization.ScalarQuantizedVectorSimilarity;
@ -55,9 +55,6 @@ public abstract sealed class Int7SQVectorScorerSupplier implements RandomVectorS
}
final float scoreFromOrds(int firstOrd, int secondOrd) throws IOException {
checkOrdinal(firstOrd);
checkOrdinal(secondOrd);
final int length = dims;
long firstByteOffset = (long) firstOrd * (length + Float.BYTES);
long secondByteOffset = (long) secondOrd * (length + Float.BYTES);
@ -92,13 +89,21 @@ public abstract sealed class Int7SQVectorScorerSupplier implements RandomVectorS
}
@Override
public RandomVectorScorer scorer(int ord) {
checkOrdinal(ord);
return new RandomVectorScorer.AbstractRandomVectorScorer(values) {
public UpdateableRandomVectorScorer scorer() {
return new UpdateableRandomVectorScorer.AbstractUpdateableRandomVectorScorer(values) {
private int ord = -1;
@Override
public float score(int node) throws IOException {
checkOrdinal(node);
return scoreFromOrds(ord, node);
}
@Override
public void setScoringOrdinal(int node) throws IOException {
checkOrdinal(node);
this.ord = node;
}
};
}

View file

@ -19,8 +19,8 @@ import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
import org.apache.lucene.util.quantization.ScalarQuantizer;
@ -50,6 +50,8 @@ import static org.hamcrest.Matchers.greaterThanOrEqualTo;
// @com.carrotsearch.randomizedtesting.annotations.Repeat(iterations = 100)
public class VectorScorerFactoryTests extends AbstractVectorTestCase {
private static final float DELTA = 1e-4f;
// bounds of the range of values that can be seen by int7 scalar quantized vectors
static final byte MIN_INT7_VALUE = 0;
static final byte MAX_INT7_VALUE = 127;
@ -99,10 +101,13 @@ public class VectorScorerFactoryTests extends AbstractVectorTestCase {
float scc = values.getScalarQuantizer().getConstantMultiplier();
float expected = luceneScore(sim, vec1, vec2, scc, vec1Correction, vec2Correction);
var luceneSupplier = luceneScoreSupplier(values, VectorSimilarityType.of(sim)).scorer(0);
var luceneSupplier = luceneScoreSupplier(values, VectorSimilarityType.of(sim)).scorer();
luceneSupplier.setScoringOrdinal(0);
assertThat(luceneSupplier.score(1), equalTo(expected));
var supplier = factory.getInt7SQVectorScorerSupplier(sim, in, values, scc).get();
assertThat(supplier.scorer(0).score(1), equalTo(expected));
var scorer = supplier.scorer();
scorer.setScoringOrdinal(0);
assertThat(scorer.score(1), equalTo(expected));
if (Runtime.version().feature() >= 22) {
var qScorer = factory.getInt7SQVectorScorer(VectorSimilarityType.of(sim), values, query1).get();
@ -134,24 +139,32 @@ public class VectorScorerFactoryTests extends AbstractVectorTestCase {
float expected = 0f;
assertThat(luceneScore(DOT_PRODUCT, vec1, vec2, 1, -5, -5), equalTo(expected));
var supplier = factory.getInt7SQVectorScorerSupplier(DOT_PRODUCT, in, values, 1).get();
assertThat(supplier.scorer(0).score(1), equalTo(expected));
assertThat(supplier.scorer(0).score(1), greaterThanOrEqualTo(0f));
var scorer = supplier.scorer();
scorer.setScoringOrdinal(0);
assertThat(scorer.score(1), equalTo(expected));
assertThat(scorer.score(1), greaterThanOrEqualTo(0f));
// max inner product
expected = luceneScore(MAXIMUM_INNER_PRODUCT, vec1, vec2, 1, -5, -5);
supplier = factory.getInt7SQVectorScorerSupplier(MAXIMUM_INNER_PRODUCT, in, values, 1).get();
assertThat(supplier.scorer(0).score(1), greaterThanOrEqualTo(0f));
assertThat(supplier.scorer(0).score(1), equalTo(expected));
scorer = supplier.scorer();
scorer.setScoringOrdinal(0);
assertThat(scorer.score(1), greaterThanOrEqualTo(0f));
assertThat(scorer.score(1), equalTo(expected));
// cosine
expected = 0f;
assertThat(luceneScore(COSINE, vec1, vec2, 1, -5, -5), equalTo(expected));
supplier = factory.getInt7SQVectorScorerSupplier(COSINE, in, values, 1).get();
assertThat(supplier.scorer(0).score(1), equalTo(expected));
assertThat(supplier.scorer(0).score(1), greaterThanOrEqualTo(0f));
scorer = supplier.scorer();
scorer.setScoringOrdinal(0);
assertThat(scorer.score(1), equalTo(expected));
assertThat(scorer.score(1), greaterThanOrEqualTo(0f));
// euclidean
expected = luceneScore(EUCLIDEAN, vec1, vec2, 1, -5, -5);
supplier = factory.getInt7SQVectorScorerSupplier(EUCLIDEAN, in, values, 1).get();
assertThat(supplier.scorer(0).score(1), equalTo(expected));
assertThat(supplier.scorer(0).score(1), greaterThanOrEqualTo(0f));
scorer = supplier.scorer();
scorer.setScoringOrdinal(0);
assertThat(scorer.score(1), equalTo(expected));
assertThat(scorer.score(1), greaterThanOrEqualTo(0f));
}
}
}
@ -208,7 +221,9 @@ public class VectorScorerFactoryTests extends AbstractVectorTestCase {
var values = vectorValues(dims, size, in, VectorSimilarityType.of(sim));
float expected = luceneScore(sim, vectors[idx0], vectors[idx1], correction, offsets[idx0], offsets[idx1]);
var supplier = factory.getInt7SQVectorScorerSupplier(sim, in, values, correction).get();
assertThat(supplier.scorer(idx0).score(idx1), equalTo(expected));
var scorer = supplier.scorer();
scorer.setScoringOrdinal(idx0);
assertThat(scorer.score(idx1), equalTo(expected));
}
}
}
@ -265,7 +280,7 @@ public class VectorScorerFactoryTests extends AbstractVectorTestCase {
var expected = luceneScore(sim, qVectors[idx0], qVectors[idx1], correction, corrections[idx0], corrections[idx1]);
var scorer = factory.getInt7SQVectorScorer(VectorSimilarityType.of(sim), values, vectors[idx0]).get();
assertThat(scorer.score(idx1), equalTo(expected));
assertEquals(scorer.score(idx1), expected, DELTA);
}
}
}
@ -313,7 +328,9 @@ public class VectorScorerFactoryTests extends AbstractVectorTestCase {
var values = vectorValues(dims, size, in, VectorSimilarityType.of(sim));
float expected = luceneScore(sim, vectors[idx0], vectors[idx1], correction, offsets[idx0], offsets[idx1]);
var supplier = factory.getInt7SQVectorScorerSupplier(sim, in, values, correction).get();
assertThat(supplier.scorer(idx0).score(idx1), equalTo(expected));
var scorer = supplier.scorer();
scorer.setScoringOrdinal(idx0);
assertThat(scorer.score(idx1), equalTo(expected));
}
}
}
@ -352,7 +369,9 @@ public class VectorScorerFactoryTests extends AbstractVectorTestCase {
var values = vectorValues(dims, size, in, VectorSimilarityType.of(sim));
float expected = luceneScore(sim, vector(idx0, dims), vector(idx1, dims), correction, off0, off1);
var supplier = factory.getInt7SQVectorScorerSupplier(sim, in, values, correction).get();
assertThat(supplier.scorer(idx0).score(idx1), equalTo(expected));
var scorer = supplier.scorer();
scorer.setScoringOrdinal(idx0);
assertThat(scorer.score(idx1), equalTo(expected));
}
}
}
@ -391,8 +410,8 @@ public class VectorScorerFactoryTests extends AbstractVectorTestCase {
var values = vectorValues(dims, 4, in, VectorSimilarityType.of(sim));
var scoreSupplier = factory.getInt7SQVectorScorerSupplier(sim, in, values, 1f).get();
var tasks = List.<Callable<Optional<Throwable>>>of(
new ScoreCallable(scoreSupplier.copy().scorer(0), 1, expectedScore1),
new ScoreCallable(scoreSupplier.copy().scorer(2), 3, expectedScore2)
new ScoreCallable(scoreSupplier.copy().scorer(), 0, 1, expectedScore1),
new ScoreCallable(scoreSupplier.copy().scorer(), 2, 3, expectedScore2)
);
var executor = Executors.newFixedThreadPool(2);
var results = executor.invokeAll(tasks);
@ -408,14 +427,19 @@ public class VectorScorerFactoryTests extends AbstractVectorTestCase {
static class ScoreCallable implements Callable<Optional<Throwable>> {
final RandomVectorScorer scorer;
final UpdateableRandomVectorScorer scorer;
final int ord;
final float expectedScore;
ScoreCallable(RandomVectorScorer scorer, int ord, float expectedScore) {
this.scorer = scorer;
this.ord = ord;
this.expectedScore = expectedScore;
ScoreCallable(UpdateableRandomVectorScorer scorer, int queryOrd, int ord, float expectedScore) {
try {
this.scorer = scorer;
this.scorer.setScoringOrdinal(queryOrd);
this.ord = ord;
this.expectedScore = expectedScore;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override

View file

@ -28,6 +28,8 @@ public abstract class AbstractCompoundWordTokenFilterFactory extends AbstractTok
protected final int maxSubwordSize;
protected final boolean onlyLongestMatch;
protected final CharArraySet wordList;
// TODO expose this parameter?
protected final boolean reuseChars;
protected AbstractCompoundWordTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(name);
@ -36,6 +38,8 @@ public abstract class AbstractCompoundWordTokenFilterFactory extends AbstractTok
minSubwordSize = settings.getAsInt("min_subword_size", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
maxSubwordSize = settings.getAsInt("max_subword_size", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
onlyLongestMatch = settings.getAsBoolean("only_longest_match", false);
// TODO is the default of true correct? see: https://github.com/apache/lucene/pull/14278
reuseChars = true;
wordList = Analysis.getWordSet(env, settings, "word_list");
if (wordList == null) {
throw new IllegalArgumentException("word_list must be provided for [" + name + "], either as a path to a file, or directly");

View file

@ -28,6 +28,14 @@ public class DictionaryCompoundWordTokenFilterFactory extends AbstractCompoundWo
@Override
public TokenStream create(TokenStream tokenStream) {
return new DictionaryCompoundWordTokenFilter(tokenStream, wordList, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
return new DictionaryCompoundWordTokenFilter(
tokenStream,
wordList,
minWordSize,
minSubwordSize,
maxSubwordSize,
onlyLongestMatch,
reuseChars
);
}
}

View file

@ -159,6 +159,7 @@ public class IndexVersions {
public static final IndexVersion SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_UNSIGNED_LONG = def(9_019_0_00, Version.LUCENE_10_1_0);
public static final IndexVersion SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_SCALED_FLOAT = def(9_020_0_00, Version.LUCENE_10_1_0);
public static final IndexVersion USE_LUCENE101_POSTINGS_FORMAT = def(9_021_0_00, Version.LUCENE_10_1_0);
public static final IndexVersion UPGRADE_TO_LUCENE_10_2_0 = def(9_022_00_0, Version.LUCENE_10_2_0);
/*
* STOP! READ THIS FIRST! No, really,
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _

View file

@ -22,6 +22,7 @@ import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.VectorUtil;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
import java.io.IOException;
@ -130,18 +131,33 @@ class ES815BitFlatVectorsFormat extends FlatVectorsFormat {
}
static class HammingScorerSupplier implements RandomVectorScorerSupplier {
private final ByteVectorValues byteValues, byteValues1, byteValues2;
private final ByteVectorValues byteValues, targetValues;
HammingScorerSupplier(ByteVectorValues byteValues) throws IOException {
this.byteValues = byteValues;
this.byteValues1 = byteValues.copy();
this.byteValues2 = byteValues.copy();
this.targetValues = byteValues.copy();
}
@Override
public RandomVectorScorer scorer(int i) throws IOException {
byte[] query = byteValues1.vectorValue(i);
return new HammingVectorScorer(byteValues2, query);
public UpdateableRandomVectorScorer scorer() throws IOException {
return new UpdateableRandomVectorScorer.AbstractUpdateableRandomVectorScorer(targetValues) {
private final byte[] query = new byte[targetValues.dimension()];
private int currentOrd = -1;
@Override
public void setScoringOrdinal(int i) throws IOException {
if (currentOrd == i) {
return;
}
System.arraycopy(targetValues.vectorValue(i), 0, query, 0, query.length);
this.currentOrd = i;
}
@Override
public float score(int i) throws IOException {
return hammingScore(targetValues.vectorValue(i), query);
}
};
}
@Override

View file

@ -26,6 +26,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.VectorUtil;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
import org.elasticsearch.index.codec.vectors.BQSpaceUtils;
import org.elasticsearch.simdvec.ESVectorUtil;
@ -76,8 +77,20 @@ public class ES818BinaryFlatVectorsScorer implements FlatVectorsScorer {
byte[] quantized = new byte[BQSpaceUtils.B_QUERY * binarizedVectors.discretizedDimensions() / 8];
OptimizedScalarQuantizer.QuantizationResult queryCorrections = quantizer.scalarQuantize(target, initial, (byte) 4, centroid);
BQSpaceUtils.transposeHalfByte(initial, quantized);
BinaryQueryVector queryVector = new BinaryQueryVector(quantized, queryCorrections);
return new BinarizedRandomVectorScorer(queryVector, binarizedVectors, similarityFunction);
return new RandomVectorScorer.AbstractRandomVectorScorer(vectorValues) {
@Override
public float score(int i) throws IOException {
return quantizedScore(
binarizedVectors.dimension(),
similarityFunction,
binarizedVectors.getCentroidDP(),
quantized,
queryCorrections,
binarizedVectors.vectorValue(i),
binarizedVectors.getCorrectiveTerms(i)
);
}
};
}
return nonQuantizedDelegate.getRandomVectorScorer(similarityFunction, vectorValues, target);
}
@ -121,68 +134,95 @@ public class ES818BinaryFlatVectorsScorer implements FlatVectorsScorer {
}
@Override
public RandomVectorScorer scorer(int ord) throws IOException {
byte[] vector = queryVectors.vectorValue(ord);
OptimizedScalarQuantizer.QuantizationResult correctiveTerms = queryVectors.getCorrectiveTerms(ord);
BinaryQueryVector binaryQueryVector = new BinaryQueryVector(vector, correctiveTerms);
return new BinarizedRandomVectorScorer(binaryQueryVector, targetVectors, similarityFunction);
public BinarizedRandomVectorScorer scorer() throws IOException {
return new BinarizedRandomVectorScorer(queryVectors.copy(), targetVectors.copy(), similarityFunction);
}
@Override
public RandomVectorScorerSupplier copy() throws IOException {
return new BinarizedRandomVectorScorerSupplier(queryVectors.copy(), targetVectors.copy(), similarityFunction);
return new BinarizedRandomVectorScorerSupplier(queryVectors, targetVectors, similarityFunction);
}
}
/** A binarized query representing its quantized form along with factors */
public record BinaryQueryVector(byte[] vector, OptimizedScalarQuantizer.QuantizationResult quantizationResult) {}
/** Vector scorer over binarized vector values */
public static class BinarizedRandomVectorScorer extends RandomVectorScorer.AbstractRandomVectorScorer {
private final BinaryQueryVector queryVector;
public static class BinarizedRandomVectorScorer extends UpdateableRandomVectorScorer.AbstractUpdateableRandomVectorScorer {
private final ES818BinaryQuantizedVectorsWriter.OffHeapBinarizedQueryVectorValues queryVectors;
private final BinarizedByteVectorValues targetVectors;
private final VectorSimilarityFunction similarityFunction;
private final byte[] quantizedQuery;
private OptimizedScalarQuantizer.QuantizationResult queryCorrections = null;
private int currentOrdinal = -1;
public BinarizedRandomVectorScorer(
BinaryQueryVector queryVectors,
BinarizedRandomVectorScorer(
ES818BinaryQuantizedVectorsWriter.OffHeapBinarizedQueryVectorValues queryVectors,
BinarizedByteVectorValues targetVectors,
VectorSimilarityFunction similarityFunction
) {
super(targetVectors);
this.queryVector = queryVectors;
this.queryVectors = queryVectors;
this.quantizedQuery = new byte[queryVectors.quantizedDimension()];
this.targetVectors = targetVectors;
this.similarityFunction = similarityFunction;
}
@Override
public float score(int targetOrd) throws IOException {
byte[] quantizedQuery = queryVector.vector();
byte[] binaryCode = targetVectors.vectorValue(targetOrd);
float qcDist = ESVectorUtil.ipByteBinByte(quantizedQuery, binaryCode);
OptimizedScalarQuantizer.QuantizationResult queryCorrections = queryVector.quantizationResult();
OptimizedScalarQuantizer.QuantizationResult indexCorrections = targetVectors.getCorrectiveTerms(targetOrd);
float x1 = indexCorrections.quantizedComponentSum();
float ax = indexCorrections.lowerInterval();
// Here we assume `lx` is simply bit vectors, so the scaling isn't necessary
float lx = indexCorrections.upperInterval() - ax;
float ay = queryCorrections.lowerInterval();
float ly = (queryCorrections.upperInterval() - ay) * FOUR_BIT_SCALE;
float y1 = queryCorrections.quantizedComponentSum();
float score = ax * ay * targetVectors.dimension() + ay * lx * x1 + ax * ly * y1 + lx * ly * qcDist;
// For euclidean, we need to invert the score and apply the additional correction, which is
// assumed to be the squared l2norm of the centroid centered vectors.
if (similarityFunction == EUCLIDEAN) {
score = queryCorrections.additionalCorrection() + indexCorrections.additionalCorrection() - 2 * score;
return Math.max(1 / (1f + score), 0);
} else {
// For cosine and max inner product, we need to apply the additional correction, which is
// assumed to be the non-centered dot-product between the vector and the centroid
score += queryCorrections.additionalCorrection() + indexCorrections.additionalCorrection() - targetVectors.getCentroidDP();
if (similarityFunction == MAXIMUM_INNER_PRODUCT) {
return VectorUtil.scaleMaxInnerProductScore(score);
}
return Math.max((1f + score) / 2f, 0);
if (queryCorrections == null) {
throw new IllegalStateException("score() called before setScoringOrdinal()");
}
return quantizedScore(
targetVectors.dimension(),
similarityFunction,
targetVectors.getCentroidDP(),
quantizedQuery,
queryCorrections,
targetVectors.vectorValue(targetOrd),
targetVectors.getCorrectiveTerms(targetOrd)
);
}
@Override
public void setScoringOrdinal(int i) throws IOException {
if (i == currentOrdinal) {
return;
}
System.arraycopy(queryVectors.vectorValue(i), 0, quantizedQuery, 0, quantizedQuery.length);
queryCorrections = queryVectors.getCorrectiveTerms(i);
currentOrdinal = i;
}
}
private static float quantizedScore(
int dims,
VectorSimilarityFunction similarityFunction,
float centroidDp,
byte[] q,
OptimizedScalarQuantizer.QuantizationResult queryCorrections,
byte[] d,
OptimizedScalarQuantizer.QuantizationResult indexCorrections
) {
float qcDist = ESVectorUtil.ipByteBinByte(q, d);
float x1 = indexCorrections.quantizedComponentSum();
float ax = indexCorrections.lowerInterval();
// Here we assume `lx` is simply bit vectors, so the scaling isn't necessary
float lx = indexCorrections.upperInterval() - ax;
float ay = queryCorrections.lowerInterval();
float ly = (queryCorrections.upperInterval() - ay) * FOUR_BIT_SCALE;
float y1 = queryCorrections.quantizedComponentSum();
float score = ax * ay * dims + ay * lx * x1 + ax * ly * y1 + lx * ly * qcDist;
// For euclidean, we need to invert the score and apply the additional correction, which is
// assumed to be the squared l2norm of the centroid centered vectors.
if (similarityFunction == EUCLIDEAN) {
score = queryCorrections.additionalCorrection() + indexCorrections.additionalCorrection() - 2 * score;
return Math.max(1 / (1f + score), 0);
} else {
// For cosine and max inner product, we need to apply the additional correction, which is
// assumed to be the non-centered dot-product between the vector and the centroid
score += queryCorrections.additionalCorrection() + indexCorrections.additionalCorrection() - centroidDp;
if (similarityFunction == MAXIMUM_INNER_PRODUCT) {
return VectorUtil.scaleMaxInnerProductScore(score);
}
return Math.max((1f + score) / 2f, 0);
}
}
}

View file

@ -44,8 +44,8 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.VectorUtil;
import org.apache.lucene.util.hnsw.CloseableRandomVectorScorerSupplier;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
import org.elasticsearch.core.SuppressForbidden;
import org.elasticsearch.index.codec.vectors.BQSpaceUtils;
import org.elasticsearch.index.codec.vectors.BQVectorUtils;
@ -763,6 +763,10 @@ public class ES818BinaryQuantizedVectorsWriter extends FlatVectorsWriter {
);
}
int quantizedDimension() {
return byteBuffer.array().length;
}
public int size() {
return size;
}
@ -887,8 +891,8 @@ public class ES818BinaryQuantizedVectorsWriter extends FlatVectorsWriter {
}
@Override
public RandomVectorScorer scorer(int ord) throws IOException {
return supplier.scorer(ord);
public UpdateableRandomVectorScorer scorer() throws IOException {
return supplier.scorer();
}
@Override

View file

@ -58,7 +58,7 @@ class PointsSortedDocsProducer extends SortedDocsProducer {
}
upperBucket = (Long) upperValue;
}
DocIdSetBuilder builder = fillDocIdSet ? new DocIdSetBuilder(context.reader().maxDoc(), values, field) : null;
DocIdSetBuilder builder = fillDocIdSet ? new DocIdSetBuilder(context.reader().maxDoc(), values) : null;
Visitor visitor = new Visitor(context, queue, builder, values.getBytesPerDimension(), lowerBucket, upperBucket);
try {
values.intersect(visitor);

View file

@ -328,7 +328,11 @@ public abstract class FiltersAggregator extends BucketsAggregator {
hasOtherBucket
);
}
return new MultiFilterLeafCollector(sub, filterWrappers, numFilters, totalNumKeys, usesCompetitiveIterator, hasOtherBucket);
if (usesCompetitiveIterator) {
return new MultiFilterCompetitiveLeafCollector(sub, filterWrappers, numFilters, totalNumKeys, hasOtherBucket);
} else {
return new MultiFilterLeafCollector(sub, filterWrappers, numFilters, totalNumKeys, hasOtherBucket);
}
}
}
@ -400,21 +404,20 @@ public abstract class FiltersAggregator extends BucketsAggregator {
}
}
private class MultiFilterLeafCollector extends AbstractLeafCollector {
private final class MultiFilterLeafCollector extends AbstractLeafCollector {
// A DocIdSetIterator heap with one entry for each filter, ordered by doc ID
final DisiPriorityQueue filterIterators;
DisiPriorityQueue filterIterators;
MultiFilterLeafCollector(
LeafBucketCollector sub,
List<FilterMatchingDisiWrapper> filterWrappers,
int numFilters,
int totalNumKeys,
boolean usesCompetitiveIterator,
boolean hasOtherBucket
) {
super(sub, numFilters, totalNumKeys, usesCompetitiveIterator, hasOtherBucket);
filterIterators = filterWrappers.isEmpty() ? null : new DisiPriorityQueue(filterWrappers.size());
super(sub, numFilters, totalNumKeys, false, hasOtherBucket);
filterIterators = filterWrappers.isEmpty() ? null : DisiPriorityQueue.ofMaxSize(filterWrappers.size());
for (FilterMatchingDisiWrapper wrapper : filterWrappers) {
filterIterators.add(wrapper);
}
@ -423,7 +426,7 @@ public abstract class FiltersAggregator extends BucketsAggregator {
public void collect(int doc, long bucket) throws IOException {
boolean matched = false;
if (filterIterators != null) {
// Advance filters if necessary. Filters will already be advanced if used as a competitive iterator.
// Advance filters if necessary.
DisiWrapper top = filterIterators.top();
while (top.doc < doc) {
top.doc = top.approximation.advance(doc);
@ -448,16 +451,51 @@ public abstract class FiltersAggregator extends BucketsAggregator {
}
@Override
public DocIdSetIterator competitiveIterator() throws IOException {
if (usesCompetitiveIterator) {
// A DocIdSetIterator view of the filterIterators heap
assert filterIterators != null;
return new DisjunctionDISIApproximation(filterIterators);
}
public DocIdSetIterator competitiveIterator() {
return null;
}
}
private final class MultiFilterCompetitiveLeafCollector extends AbstractLeafCollector {
private final DisjunctionDISIApproximation disjunctionDisi;
MultiFilterCompetitiveLeafCollector(
LeafBucketCollector sub,
List<FilterMatchingDisiWrapper> filterWrappers,
int numFilters,
int totalNumKeys,
boolean hasOtherBucket
) {
super(sub, numFilters, totalNumKeys, true, hasOtherBucket);
assert filterWrappers.isEmpty() == false;
disjunctionDisi = DisjunctionDISIApproximation.of(filterWrappers, Long.MAX_VALUE);
}
public void collect(int doc, long bucket) throws IOException {
boolean matched = false;
int target = disjunctionDisi.advance(doc);
if (target == doc) {
for (DisiWrapper w = disjunctionDisi.topList(); w != null; w = w.next) {
FilterMatchingDisiWrapper topMatch = (FilterMatchingDisiWrapper) w;
if (topMatch.checkDocForMatch(doc)) {
collectBucket(sub, doc, bucketOrd(bucket, topMatch.filterOrd));
matched = true;
}
}
}
if (hasOtherBucket && false == matched) {
collectBucket(sub, doc, bucketOrd(bucket, numFilters));
}
}
@Override
public DocIdSetIterator competitiveIterator() {
return disjunctionDisi;
}
}
private static class FilterMatchingDisiWrapper extends DisiWrapper {
final int filterOrd;

View file

@ -56,7 +56,6 @@ import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.suggest.document.Completion101PostingsFormat;
import org.apache.lucene.search.suggest.document.CompletionPostingsFormat;
import org.apache.lucene.search.suggest.document.SuggestField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FilterDirectory;
@ -331,7 +330,7 @@ public class IndexDiskUsageAnalyzerTests extends ESTestCase {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if (field.startsWith("suggest_")) {
return new Completion101PostingsFormat(randomFrom(CompletionPostingsFormat.FSTLoadMode.values()));
return new Completion101PostingsFormat();
} else {
return super.postingsFormat();
}

View file

@ -26,6 +26,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.VectorUtil;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
import org.elasticsearch.index.codec.vectors.BQSpaceUtils;
import org.elasticsearch.index.codec.vectors.BQVectorUtils;
import org.elasticsearch.simdvec.ESVectorUtil;
@ -120,28 +121,48 @@ class ES816BinaryFlatRWVectorsScorer implements FlatVectorsScorer {
}
@Override
public RandomVectorScorer scorer(int ord) throws IOException {
byte[] vector = queryVectors.vectorValue(ord);
int quantizedSum = queryVectors.sumQuantizedValues(ord);
float distanceToCentroid = queryVectors.getCentroidDistance(ord);
float lower = queryVectors.getLower(ord);
float width = queryVectors.getWidth(ord);
float normVmC = 0f;
float vDotC = 0f;
if (similarityFunction != EUCLIDEAN) {
normVmC = queryVectors.getNormVmC(ord);
vDotC = queryVectors.getVDotC(ord);
}
BinaryQueryVector binaryQueryVector = new BinaryQueryVector(
vector,
new BinaryQuantizer.QueryFactors(quantizedSum, distanceToCentroid, lower, width, normVmC, vDotC)
);
return new BinarizedRandomVectorScorer(binaryQueryVector, targetVectors, similarityFunction);
public UpdateableRandomVectorScorer scorer() throws IOException {
byte[] queryVector = new byte[queryVectors.quantizedDimension()];
return new UpdateableRandomVectorScorer.AbstractUpdateableRandomVectorScorer(targetVectors) {
private int ord = -1;
private BinaryQuantizer.QueryFactors factors = null;
private final ES816BinaryQuantizedVectorsWriter.OffHeapBinarizedQueryVectorValues queryVectors =
BinarizedRandomVectorScorerSupplier.this.queryVectors.copy();
private final BinarizedByteVectorValues targetVectors = BinarizedRandomVectorScorerSupplier.this.targetVectors.copy();
@Override
public void setScoringOrdinal(int i) throws IOException {
if (i == ord) {
return;
}
ord = i;
System.arraycopy(queryVectors.vectorValue(i), 0, queryVector, 0, queryVector.length);
int quantizedSum = queryVectors.sumQuantizedValues(ord);
float distanceToCentroid = queryVectors.getCentroidDistance(ord);
float lower = queryVectors.getLower(ord);
float width = queryVectors.getWidth(ord);
float normVmC = 0f;
float vDotC = 0f;
if (similarityFunction != EUCLIDEAN) {
normVmC = queryVectors.getNormVmC(ord);
vDotC = queryVectors.getVDotC(ord);
}
factors = new BinaryQuantizer.QueryFactors(quantizedSum, distanceToCentroid, lower, width, normVmC, vDotC);
}
@Override
public float score(int i) throws IOException {
if (factors == null) {
throw new IllegalStateException("setScoringOrdinal must be called before score");
}
return quantizedScore(this.targetVectors, i, queryVector, factors, similarityFunction);
}
};
}
@Override
public RandomVectorScorerSupplier copy() throws IOException {
return new BinarizedRandomVectorScorerSupplier(queryVectors.copy(), targetVectors.copy(), similarityFunction);
return new BinarizedRandomVectorScorerSupplier(queryVectors, targetVectors, similarityFunction);
}
}
@ -154,9 +175,6 @@ class ES816BinaryFlatRWVectorsScorer implements FlatVectorsScorer {
private final BinarizedByteVectorValues targetVectors;
private final VectorSimilarityFunction similarityFunction;
private final float sqrtDimensions;
private final float maxX1;
BinarizedRandomVectorScorer(
BinaryQueryVector queryVectors,
BinarizedByteVectorValues targetVectors,
@ -166,91 +184,97 @@ class ES816BinaryFlatRWVectorsScorer implements FlatVectorsScorer {
this.queryVector = queryVectors;
this.targetVectors = targetVectors;
this.similarityFunction = similarityFunction;
// FIXME: precompute this once?
this.sqrtDimensions = targetVectors.sqrtDimensions();
this.maxX1 = targetVectors.maxX1();
}
@Override
public float score(int targetOrd) throws IOException {
byte[] quantizedQuery = queryVector.vector();
int quantizedSum = queryVector.factors().quantizedSum();
float lower = queryVector.factors().lower();
float width = queryVector.factors().width();
float distanceToCentroid = queryVector.factors().distToC();
if (similarityFunction == EUCLIDEAN) {
return euclideanScore(targetOrd, sqrtDimensions, quantizedQuery, distanceToCentroid, lower, quantizedSum, width);
}
float vmC = queryVector.factors().normVmC();
float vDotC = queryVector.factors().vDotC();
float cDotC = targetVectors.getCentroidDP();
byte[] binaryCode = targetVectors.vectorValue(targetOrd);
float ooq = targetVectors.getOOQ(targetOrd);
float normOC = targetVectors.getNormOC(targetOrd);
float oDotC = targetVectors.getODotC(targetOrd);
float qcDist = ESVectorUtil.ipByteBinByte(quantizedQuery, binaryCode);
// FIXME: pre-compute these only once for each target vector
// ... pull this out or use a similar cache mechanism as do in score
float xbSum = (float) BQVectorUtils.popcount(binaryCode);
final float dist;
// If ||o-c|| == 0, so, it's ok to throw the rest of the equation away
// and simply use `oDotC + vDotC - cDotC` as centroid == doc vector
if (normOC == 0 || ooq == 0) {
dist = oDotC + vDotC - cDotC;
} else {
// If ||o-c|| != 0, we should assume that `ooq` is finite
assert Float.isFinite(ooq);
float estimatedDot = (2 * width / sqrtDimensions * qcDist + 2 * lower / sqrtDimensions * xbSum - width / sqrtDimensions
* quantizedSum - sqrtDimensions * lower) / ooq;
dist = vmC * normOC * estimatedDot + oDotC + vDotC - cDotC;
}
assert Float.isFinite(dist);
float ooqSqr = (float) Math.pow(ooq, 2);
float errorBound = (float) (vmC * normOC * (maxX1 * Math.sqrt((1 - ooqSqr) / ooqSqr)));
float score = Float.isFinite(errorBound) ? dist - errorBound : dist;
if (similarityFunction == MAXIMUM_INNER_PRODUCT) {
return VectorUtil.scaleMaxInnerProductScore(score);
}
return Math.max((1f + score) / 2f, 0);
}
private float euclideanScore(
int targetOrd,
float sqrtDimensions,
byte[] quantizedQuery,
float distanceToCentroid,
float lower,
int quantizedSum,
float width
) throws IOException {
byte[] binaryCode = targetVectors.vectorValue(targetOrd);
// FIXME: pre-compute these only once for each target vector
// .. not sure how to enumerate the target ordinals but that's what we did in PoC
float targetDistToC = targetVectors.getCentroidDistance(targetOrd);
float x0 = targetVectors.getVectorMagnitude(targetOrd);
float sqrX = targetDistToC * targetDistToC;
double xX0 = targetDistToC / x0;
// TODO maybe store?
float xbSum = (float) BQVectorUtils.popcount(binaryCode);
float factorPPC = (float) (-2.0 / sqrtDimensions * xX0 * (xbSum * 2.0 - targetVectors.dimension()));
float factorIP = (float) (-2.0 / sqrtDimensions * xX0);
long qcDist = ESVectorUtil.ipByteBinByte(quantizedQuery, binaryCode);
float score = sqrX + distanceToCentroid + factorPPC * lower + (qcDist * 2 - quantizedSum) * factorIP * width;
float projectionDist = (float) Math.sqrt(xX0 * xX0 - targetDistToC * targetDistToC);
float error = 2.0f * maxX1 * projectionDist;
float y = (float) Math.sqrt(distanceToCentroid);
float errorBound = y * error;
if (Float.isFinite(errorBound)) {
score = score + errorBound;
}
return Math.max(1 / (1f + score), 0);
return quantizedScore(targetVectors, targetOrd, queryVector.vector(), queryVector.factors(), similarityFunction);
}
}
private static float quantizedScore(
BinarizedByteVectorValues targetVectors,
int targetOrd,
byte[] quantizedQuery,
BinaryQuantizer.QueryFactors queryFactors,
VectorSimilarityFunction similarityFunction
) throws IOException {
if (similarityFunction == EUCLIDEAN) {
return euclideanQuantizedScore(targetVectors, targetOrd, queryFactors, quantizedQuery);
}
return dotProductQuantizedScore(targetVectors, targetOrd, quantizedQuery, queryFactors, similarityFunction);
}
private static float dotProductQuantizedScore(
BinarizedByteVectorValues targetVectors,
int targetOrd,
byte[] quantizedQuery,
BinaryQuantizer.QueryFactors queryFactors,
VectorSimilarityFunction similarityFunction
) throws IOException {
float vmC = queryFactors.normVmC();
float vDotC = queryFactors.vDotC();
float cDotC = targetVectors.getCentroidDP();
byte[] binaryCode = targetVectors.vectorValue(targetOrd);
float ooq = targetVectors.getOOQ(targetOrd);
float normOC = targetVectors.getNormOC(targetOrd);
float oDotC = targetVectors.getODotC(targetOrd);
float qcDist = ESVectorUtil.ipByteBinByte(quantizedQuery, binaryCode);
float xbSum = (float) BQVectorUtils.popcount(binaryCode);
final float dist;
// If ||o-c|| == 0, so, it's ok to throw the rest of the equation away
// and simply use `oDotC + vDotC - cDotC` as centroid == doc vector
if (normOC == 0 || ooq == 0) {
dist = oDotC + vDotC - cDotC;
} else {
// If ||o-c|| != 0, we should assume that `ooq` is finite
assert Float.isFinite(ooq);
float estimatedDot = (2 * queryFactors.width() / targetVectors.sqrtDimensions() * qcDist + 2 * queryFactors.lower()
/ targetVectors.sqrtDimensions() * xbSum - queryFactors.width() / targetVectors.sqrtDimensions() * queryFactors
.quantizedSum() - targetVectors.sqrtDimensions() * queryFactors.lower()) / ooq;
dist = vmC * normOC * estimatedDot + oDotC + vDotC - cDotC;
}
assert Float.isFinite(dist);
float ooqSqr = (float) Math.pow(ooq, 2);
float errorBound = (float) (vmC * normOC * (targetVectors.maxX1() * Math.sqrt((1 - ooqSqr) / ooqSqr)));
float score = Float.isFinite(errorBound) ? dist - errorBound : dist;
if (similarityFunction == MAXIMUM_INNER_PRODUCT) {
return VectorUtil.scaleMaxInnerProductScore(score);
}
return Math.max((1f + score) / 2f, 0);
}
private static float euclideanQuantizedScore(
BinarizedByteVectorValues targetVectors,
int targetOrd,
BinaryQuantizer.QueryFactors factors,
byte[] quantizedQuery
) throws IOException {
byte[] binaryCode = targetVectors.vectorValue(targetOrd);
float targetDistToC = targetVectors.getCentroidDistance(targetOrd);
float x0 = targetVectors.getVectorMagnitude(targetOrd);
float sqrX = targetDistToC * targetDistToC;
double xX0 = targetDistToC / x0;
float xbSum = (float) BQVectorUtils.popcount(binaryCode);
float factorPPC = (float) (-2.0 / targetVectors.sqrtDimensions() * xX0 * (xbSum * 2.0 - targetVectors.dimension()));
float factorIP = (float) (-2.0 / targetVectors.sqrtDimensions() * xX0);
long qcDist = ESVectorUtil.ipByteBinByte(quantizedQuery, binaryCode);
float score = sqrX + factors.distToC() + factorPPC * factors.lower() + (qcDist * 2 - factors.quantizedSum()) * factorIP * factors
.width();
float projectionDist = (float) Math.sqrt(xX0 * xX0 - targetDistToC * targetDistToC);
float error = 2.0f * targetVectors.maxX1() * projectionDist;
float y = (float) Math.sqrt(factors.distToC());
float errorBound = y * error;
if (Float.isFinite(errorBound)) {
score = score + errorBound;
}
return Math.max(1 / (1f + score), 0);
}
}

View file

@ -45,8 +45,8 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.VectorUtil;
import org.apache.lucene.util.hnsw.CloseableRandomVectorScorerSupplier;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
import org.elasticsearch.core.SuppressForbidden;
import org.elasticsearch.index.codec.vectors.BQSpaceUtils;
import org.elasticsearch.index.codec.vectors.BQVectorUtils;
@ -822,6 +822,10 @@ class ES816BinaryQuantizedVectorsWriter extends FlatVectorsWriter {
return dimension;
}
public int quantizedDimension() {
return byteBuffer.array().length;
}
public OffHeapBinarizedQueryVectorValues copy() throws IOException {
return new OffHeapBinarizedQueryVectorValues(slice.clone(), dimension, size, vectorSimilarityFunction);
}
@ -959,8 +963,8 @@ class ES816BinaryQuantizedVectorsWriter extends FlatVectorsWriter {
}
@Override
public RandomVectorScorer scorer(int ord) throws IOException {
return supplier.scorer(ord);
public UpdateableRandomVectorScorer scorer() throws IOException {
return supplier.scorer();
}
@Override

View file

@ -15,6 +15,8 @@ import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
@ -36,11 +38,12 @@ public class QueryFeatureExtractor implements FeatureExtractor {
}
this.featureNames = featureNames;
this.weights = weights;
this.subScorers = new DisiPriorityQueue(weights.size());
this.subScorers = DisiPriorityQueue.ofMaxSize(weights.size());
}
@Override
public void setNextReader(LeafReaderContext segmentContext) throws IOException {
Collection<FeatureDisiWrapper> disiWrappers = new ArrayList<>();
subScorers.clear();
for (int i = 0; i < weights.size(); i++) {
var weight = weights.get(i);
@ -51,11 +54,13 @@ public class QueryFeatureExtractor implements FeatureExtractor {
if (scorerSupplier != null) {
var scorer = scorerSupplier.get(0L);
if (scorer != null) {
subScorers.add(new FeatureDisiWrapper(scorer, featureNames.get(i)));
FeatureDisiWrapper featureDisiWrapper = new FeatureDisiWrapper(scorer, featureNames.get(i));
subScorers.add(featureDisiWrapper);
disiWrappers.add(featureDisiWrapper);
}
}
}
approximation = subScorers.size() > 0 ? new DisjunctionDISIApproximation(subScorers) : null;
approximation = subScorers.size() > 0 ? new DisjunctionDISIApproximation(disiWrappers, Long.MAX_VALUE) : null;
}
@Override
@ -69,7 +74,7 @@ public class QueryFeatureExtractor implements FeatureExtractor {
if (approximation.docID() != docId) {
return;
}
var w = (FeatureDisiWrapper) subScorers.topList();
var w = (FeatureDisiWrapper) approximation.topList();
for (; w != null; w = (FeatureDisiWrapper) w.next) {
if (w.twoPhaseView == null || w.twoPhaseView.matches()) {
featureMap.put(w.featureName, w.scorable.score());

View file

@ -407,6 +407,9 @@ public class WildcardFieldMapper extends FieldMapper {
public static Query toApproximationQuery(RegExp r) throws IllegalArgumentException {
Query result = null;
switch (r.kind) {
case REGEXP_CHAR_CLASS:
result = createCharacterClassQuery(r);
break;
case REGEXP_UNION:
result = createUnionQuery(r);
break;
@ -426,7 +429,6 @@ public class WildcardFieldMapper extends FieldMapper {
// Repeat is zero or more times so zero matches = match all
result = new MatchAllDocsQuery();
break;
case REGEXP_REPEAT_MIN:
case REGEXP_REPEAT_MINMAX:
if (r.min > 0) {
@ -458,7 +460,7 @@ public class WildcardFieldMapper extends FieldMapper {
case REGEXP_INTERVAL:
case REGEXP_EMPTY:
case REGEXP_AUTOMATON:
case REGEXP_PRE_CLASS:
// case REGEXP_PRE_CLASS:
result = new MatchAllDocsQuery();
break;
}
@ -496,11 +498,35 @@ public class WildcardFieldMapper extends FieldMapper {
}
private static Query createCharacterClassQuery(RegExp r) {
// TODO: consider expanding this to allow for character ranges as well (need additional tests and performance eval)
List<Query> queries = new ArrayList<>();
if (r.from.length > MAX_CLAUSES_IN_APPROXIMATION_QUERY) {
return new MatchAllDocsQuery();
}
for (int i = 0; i < r.from.length; i++) {
// only handle character classes for now not ranges
if (r.from[i] == r.to[i]) {
String cs = Character.toString(r.from[i]);
String normalizedChar = toLowerCase(cs);
queries.add(new TermQuery(new Term("", normalizedChar)));
} else {
// immediately exit because we can't currently optimize a combination of range and classes
return new MatchAllDocsQuery();
}
}
return formQuery(queries);
}
private static Query createUnionQuery(RegExp r) {
// Create an OR of clauses
ArrayList<Query> queries = new ArrayList<>();
List<Query> queries = new ArrayList<>();
findLeaves(r.exp1, org.apache.lucene.util.automaton.RegExp.Kind.REGEXP_UNION, queries);
findLeaves(r.exp2, org.apache.lucene.util.automaton.RegExp.Kind.REGEXP_UNION, queries);
return formQuery(queries);
}
private static Query formQuery(List<Query> queries) {
BooleanQuery.Builder bOr = new BooleanQuery.Builder();
HashSet<Query> uniqueClauses = new HashSet<>();
for (Query query : queries) {