mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-27 17:10:22 -04:00
Vector test tools (#128934)
This adds some testing tools for verifying vector recall and latency directly without having to spin up an entire ES node and running a rally track. Its pretty barebones and takes inspiration from lucene-util, but I wanted access to our own formats and tooling to make our lives easier. Here is an example config file. This will build the initial index, run queries at num_candidates: 50, then again at num_candidates 100 (without reindexing, and re-using the cached nearest neighbors). ``` [{ "doc_vectors" : "path", "query_vectors" : "path", "num_docs" : 10000, "num_queries" : 10, "index_type" : "hnsw", "num_candidates" : 50, "k" : 10, "hnsw_m" : 16, "hnsw_ef_construction" : 200, "index_threads" : 4, "reindex" : true, "force_merge" : false, "vector_space" : "maximum_inner_product", "dimensions" : 768 }, { "doc_vectors" : "path", "query_vectors" : "path", "num_docs" : 10000, "num_queries" : 10, "index_type" : "hnsw", "num_candidates" : 100, "k" : 10, "hnsw_m" : 16, "hnsw_ef_construction" : 200, "vector_space" : "maximum_inner_product", "dimensions" : 768 } ] ``` To execute: ``` ./gradlew :qa:vector:checkVec --args="/Path/to/knn_tester_config.json" ``` Calling `./gradlew :qa:vector:checkVecHelp` gives some guidance on how to use it, additionally providing a way to run it via java directly (useful to bypass gradlew guff).
This commit is contained in:
parent
ffa8927a9f
commit
155c0da00a
17 changed files with 2312 additions and 6 deletions
101
qa/vector/build.gradle
Normal file
101
qa/vector/build.gradle
Normal file
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
apply plugin: 'elasticsearch.java'
|
||||
apply plugin: 'elasticsearch.build'
|
||||
|
||||
|
||||
tasks.named("dependencyLicenses").configure {
|
||||
mapping from: /lucene-.*/, to: 'lucene'
|
||||
}
|
||||
|
||||
tasks.named('forbiddenApisMain').configure {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
dependencies {
|
||||
api "org.apache.lucene:lucene-core:${versions.lucene}"
|
||||
api "org.apache.lucene:lucene-queries:${versions.lucene}"
|
||||
api "org.apache.lucene:lucene-codecs:${versions.lucene}"
|
||||
implementation project(':libs:logging')
|
||||
implementation project(':server')
|
||||
}
|
||||
/**
|
||||
* Task to run the KnnIndexTester with the provided parameters.
|
||||
*/
|
||||
tasks.register("checkVec", JavaExec) {
|
||||
group = "Execution"
|
||||
description = "Runs KnnIndexTester with the provided parameters to validate recall and performance."
|
||||
classpath = sourceSets.main.runtimeClasspath
|
||||
mainClass.set("org.elasticsearch.test.knn.KnnIndexTester")
|
||||
// Configure logging to console
|
||||
systemProperty "es.logger.out", "console"
|
||||
systemProperty "es.logger.level", "INFO" // Change to DEBUG if needed
|
||||
|
||||
if (buildParams.getRuntimeJavaVersion().map { it.majorVersion.toInteger() }.get() >= 21) {
|
||||
jvmArgs '-Xms4g', '-Xmx4g', '--add-modules=jdk.incubator.vector', '--enable-native-access=ALL-UNNAMED', '-Djava.util.concurrent.ForkJoinPool.common.parallelism=8', '-XX:+UnlockDiagnosticVMOptions', '-XX:+DebugNonSafepoints', '-XX:+HeapDumpOnOutOfMemoryError'
|
||||
}
|
||||
}
|
||||
|
||||
tasks.register("checkVecHelp", JavaExec) {
|
||||
group = "Help"
|
||||
description = "Prints help for the KnnIndexTester task."
|
||||
classpath = sourceSets.main.runtimeClasspath
|
||||
mainClass.set("org.elasticsearch.test.knn.KnnIndexTester")
|
||||
args = ["--help"]
|
||||
doLast {
|
||||
println """
|
||||
=============================================================================
|
||||
KnnIndexTester Help
|
||||
=============================================================================
|
||||
|
||||
Run with Gradle:
|
||||
----------------
|
||||
# Using default configuration file
|
||||
./gradlew :qa:vector:checkVec
|
||||
|
||||
# Using custom configuration file
|
||||
./gradlew :qa:vector:checkVec --args="path/to/your/config.json"
|
||||
|
||||
# Adjust heap size
|
||||
./gradlew :qa:vector:checkVec -Dorg.gradle.jvmargs="-Xmx8g" --args="path/to/your/config.json"
|
||||
|
||||
# Set environment variable for more extensive JVM options
|
||||
export GRADLE_OPTS="-Xmx8g -XX:+UseG1GC -XX:MaxGCPauseMillis=100"
|
||||
./gradlew :qa:vector:checkVec
|
||||
|
||||
|
||||
Run directly with Java:
|
||||
----------------------
|
||||
# Generate classpath (run once to create the file)
|
||||
./gradlew :qa:vector:printClasspath
|
||||
|
||||
# Then use the classpath file with java
|
||||
java -cp "\$(cat build/vector_classpath.txt)" \\
|
||||
--add-modules=jdk.incubator.vector \\
|
||||
--enable-native-access=ALL-UNNAMED \\
|
||||
-Djava.util.concurrent.ForkJoinPool.common.parallelism=8 \\
|
||||
-Xmx4g \\
|
||||
-Xms4g \\\\
|
||||
org.elasticsearch.test.knn.KnnIndexTester path/to/your/config.json
|
||||
"""
|
||||
}
|
||||
}
|
||||
|
||||
tasks.register("printClasspath") {
|
||||
group = "Help"
|
||||
description = "Prints the classpath needed to run KnnIndexTester directly with java"
|
||||
|
||||
doLast {
|
||||
def classpathFile = new File("${buildDir}/vector_classpath.txt")
|
||||
classpathFile.parentFile.mkdirs()
|
||||
classpathFile.text = sourceSets.main.runtimeClasspath.asPath
|
||||
println "Classpath written to: ${classpathFile.absolutePath}"
|
||||
}
|
||||
}
|
475
qa/vector/licenses/lucene-LICENSE.txt
Normal file
475
qa/vector/licenses/lucene-LICENSE.txt
Normal file
|
@ -0,0 +1,475 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
|
||||
|
||||
Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was
|
||||
derived from unicode conversion examples available at
|
||||
http://www.unicode.org/Public/PROGRAMS/CVTUTF. Here is the copyright
|
||||
from those sources:
|
||||
|
||||
/*
|
||||
* Copyright 2001-2004 Unicode, Inc.
|
||||
*
|
||||
* Disclaimer
|
||||
*
|
||||
* This source code is provided as is by Unicode, Inc. No claims are
|
||||
* made as to fitness for any particular purpose. No warranties of any
|
||||
* kind are expressed or implied. The recipient agrees to determine
|
||||
* applicability of information provided. If this file has been
|
||||
* purchased on magnetic or optical media from Unicode, Inc., the
|
||||
* sole remedy for any claim will be exchange of defective media
|
||||
* within 90 days of receipt.
|
||||
*
|
||||
* Limitations on Rights to Redistribute This Code
|
||||
*
|
||||
* Unicode, Inc. hereby grants the right to freely use the information
|
||||
* supplied in this file in the creation of products supporting the
|
||||
* Unicode Standard, and to make copies of this file in any form
|
||||
* for internal or external distribution as long as this notice
|
||||
* remains attached.
|
||||
*/
|
||||
|
||||
|
||||
Some code in core/src/java/org/apache/lucene/util/ArrayUtil.java was
|
||||
derived from Python 2.4.2 sources available at
|
||||
http://www.python.org. Full license is here:
|
||||
|
||||
http://www.python.org/download/releases/2.4.2/license/
|
||||
|
||||
Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was
|
||||
derived from Python 3.1.2 sources available at
|
||||
http://www.python.org. Full license is here:
|
||||
|
||||
http://www.python.org/download/releases/3.1.2/license/
|
||||
|
||||
Some code in core/src/java/org/apache/lucene/util/automaton was
|
||||
derived from Brics automaton sources available at
|
||||
www.brics.dk/automaton/. Here is the copyright from those sources:
|
||||
|
||||
/*
|
||||
* Copyright (c) 2001-2009 Anders Moeller
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
The levenshtein automata tables in core/src/java/org/apache/lucene/util/automaton
|
||||
were automatically generated with the moman/finenight FSA package.
|
||||
Here is the copyright for those sources:
|
||||
|
||||
# Copyright (c) 2010, Jean-Philippe Barrette-LaPierre, <jpb@rrette.com>
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person
|
||||
# obtaining a copy of this software and associated documentation
|
||||
# files (the "Software"), to deal in the Software without
|
||||
# restriction, including without limitation the rights to use,
|
||||
# copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following
|
||||
# conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was
|
||||
derived from ICU (http://www.icu-project.org)
|
||||
The full license is available here:
|
||||
http://source.icu-project.org/repos/icu/icu/trunk/license.html
|
||||
|
||||
/*
|
||||
* Copyright (C) 1999-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, and/or sell copies of the
|
||||
* Software, and to permit persons to whom the Software is furnished to do so,
|
||||
* provided that the above copyright notice(s) and this permission notice appear
|
||||
* in all copies of the Software and that both the above copyright notice(s) and
|
||||
* this permission notice appear in supporting documentation.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
||||
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE
|
||||
* LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR
|
||||
* ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
* IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
* Except as contained in this notice, the name of a copyright holder shall not
|
||||
* be used in advertising or otherwise to promote the sale, use or other
|
||||
* dealings in this Software without prior written authorization of the
|
||||
* copyright holder.
|
||||
*/
|
||||
|
||||
The following license applies to the Snowball stemmers:
|
||||
|
||||
Copyright (c) 2001, Dr Martin Porter
|
||||
Copyright (c) 2002, Richard Boulton
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holders nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
The following license applies to the KStemmer:
|
||||
|
||||
Copyright © 2003,
|
||||
Center for Intelligent Information Retrieval,
|
||||
University of Massachusetts, Amherst.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. The names "Center for Intelligent Information Retrieval" and
|
||||
"University of Massachusetts" must not be used to endorse or promote products
|
||||
derived from this software without prior written permission. To obtain
|
||||
permission, contact info@ciir.cs.umass.edu.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGE.
|
||||
|
||||
The following license applies to the Morfologik project:
|
||||
|
||||
Copyright (c) 2006 Dawid Weiss
|
||||
Copyright (c) 2007-2011 Dawid Weiss, Marcin Miłkowski
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Morfologik nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
---
|
||||
|
||||
The dictionary comes from Morfologik project. Morfologik uses data from
|
||||
Polish ispell/myspell dictionary hosted at http://www.sjp.pl/slownik/en/ and
|
||||
is licenced on the terms of (inter alia) LGPL and Creative Commons
|
||||
ShareAlike. The part-of-speech tags were added in Morfologik project and
|
||||
are not found in the data from sjp.pl. The tagset is similar to IPI PAN
|
||||
tagset.
|
||||
|
||||
---
|
||||
|
||||
The following license applies to the Morfeusz project,
|
||||
used by org.apache.lucene.analysis.morfologik.
|
||||
|
||||
BSD-licensed dictionary of Polish (SGJP)
|
||||
http://sgjp.pl/morfeusz/
|
||||
|
||||
Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński,
|
||||
Marcin Woliński, Robert Wołosz
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
|
||||
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
|
||||
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
192
qa/vector/licenses/lucene-NOTICE.txt
Normal file
192
qa/vector/licenses/lucene-NOTICE.txt
Normal file
|
@ -0,0 +1,192 @@
|
|||
Apache Lucene
|
||||
Copyright 2014 The Apache Software Foundation
|
||||
|
||||
This product includes software developed at
|
||||
The Apache Software Foundation (http://www.apache.org/).
|
||||
|
||||
Includes software from other Apache Software Foundation projects,
|
||||
including, but not limited to:
|
||||
- Apache Ant
|
||||
- Apache Jakarta Regexp
|
||||
- Apache Commons
|
||||
- Apache Xerces
|
||||
|
||||
ICU4J, (under analysis/icu) is licensed under an MIT styles license
|
||||
and Copyright (c) 1995-2008 International Business Machines Corporation and others
|
||||
|
||||
Some data files (under analysis/icu/src/data) are derived from Unicode data such
|
||||
as the Unicode Character Database. See http://unicode.org/copyright.html for more
|
||||
details.
|
||||
|
||||
Brics Automaton (under core/src/java/org/apache/lucene/util/automaton) is
|
||||
BSD-licensed, created by Anders Møller. See http://www.brics.dk/automaton/
|
||||
|
||||
The levenshtein automata tables (under core/src/java/org/apache/lucene/util/automaton) were
|
||||
automatically generated with the moman/finenight FSA library, created by
|
||||
Jean-Philippe Barrette-LaPierre. This library is available under an MIT license,
|
||||
see http://sites.google.com/site/rrettesite/moman and
|
||||
http://bitbucket.org/jpbarrette/moman/overview/
|
||||
|
||||
The class org.apache.lucene.util.WeakIdentityMap was derived from
|
||||
the Apache CXF project and is Apache License 2.0.
|
||||
|
||||
The Google Code Prettify is Apache License 2.0.
|
||||
See http://code.google.com/p/google-code-prettify/
|
||||
|
||||
JUnit (junit-4.10) is licensed under the Common Public License v. 1.0
|
||||
See http://junit.sourceforge.net/cpl-v10.html
|
||||
|
||||
This product includes code (JaspellTernarySearchTrie) from Java Spelling Checkin
|
||||
g Package (jaspell): http://jaspell.sourceforge.net/
|
||||
License: The BSD License (http://www.opensource.org/licenses/bsd-license.php)
|
||||
|
||||
The snowball stemmers in
|
||||
analysis/common/src/java/net/sf/snowball
|
||||
were developed by Martin Porter and Richard Boulton.
|
||||
The snowball stopword lists in
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/snowball
|
||||
were developed by Martin Porter and Richard Boulton.
|
||||
The full snowball package is available from
|
||||
http://snowball.tartarus.org/
|
||||
|
||||
The KStem stemmer in
|
||||
analysis/common/src/org/apache/lucene/analysis/en
|
||||
was developed by Bob Krovetz and Sergio Guzman-Lara (CIIR-UMass Amherst)
|
||||
under the BSD-license.
|
||||
|
||||
The Arabic,Persian,Romanian,Bulgarian, Hindi and Bengali analyzers (common) come with a default
|
||||
stopword list that is BSD-licensed created by Jacques Savoy. These files reside in:
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt,
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt,
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt,
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt,
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt,
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/bn/stopwords.txt
|
||||
See http://members.unine.ch/jacques.savoy/clef/index.html.
|
||||
|
||||
The German,Spanish,Finnish,French,Hungarian,Italian,Portuguese,Russian and Swedish light stemmers
|
||||
(common) are based on BSD-licensed reference implementations created by Jacques Savoy and
|
||||
Ljiljana Dolamic. These files reside in:
|
||||
analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java
|
||||
|
||||
The Stempel analyzer (stempel) includes BSD-licensed software developed
|
||||
by the Egothor project http://egothor.sf.net/, created by Leo Galambos, Martin Kvapil,
|
||||
and Edmond Nolan.
|
||||
|
||||
The Polish analyzer (stempel) comes with a default
|
||||
stopword list that is BSD-licensed created by the Carrot2 project. The file resides
|
||||
in stempel/src/resources/org/apache/lucene/analysis/pl/stopwords.txt.
|
||||
See http://project.carrot2.org/license.html.
|
||||
|
||||
The SmartChineseAnalyzer source code (smartcn) was
|
||||
provided by Xiaoping Gao and copyright 2009 by www.imdict.net.
|
||||
|
||||
WordBreakTestUnicode_*.java (under modules/analysis/common/src/test/)
|
||||
is derived from Unicode data such as the Unicode Character Database.
|
||||
See http://unicode.org/copyright.html for more details.
|
||||
|
||||
The Morfologik analyzer (morfologik) includes BSD-licensed software
|
||||
developed by Dawid Weiss and Marcin Miłkowski (http://morfologik.blogspot.com/).
|
||||
|
||||
Morfologik uses data from Polish ispell/myspell dictionary
|
||||
(http://www.sjp.pl/slownik/en/) licenced on the terms of (inter alia)
|
||||
LGPL and Creative Commons ShareAlike.
|
||||
|
||||
Morfologic includes data from BSD-licensed dictionary of Polish (SGJP)
|
||||
(http://sgjp.pl/morfeusz/)
|
||||
|
||||
Servlet-api.jar and javax.servlet-*.jar are under the CDDL license, the original
|
||||
source code for this can be found at http://www.eclipse.org/jetty/downloads.php
|
||||
|
||||
===========================================================================
|
||||
Kuromoji Japanese Morphological Analyzer - Apache Lucene Integration
|
||||
===========================================================================
|
||||
|
||||
This software includes a binary and/or source version of data from
|
||||
|
||||
mecab-ipadic-2.7.0-20070801
|
||||
|
||||
which can be obtained from
|
||||
|
||||
http://atilika.com/releases/mecab-ipadic/mecab-ipadic-2.7.0-20070801.tar.gz
|
||||
|
||||
or
|
||||
|
||||
http://jaist.dl.sourceforge.net/project/mecab/mecab-ipadic/2.7.0-20070801/mecab-ipadic-2.7.0-20070801.tar.gz
|
||||
|
||||
===========================================================================
|
||||
mecab-ipadic-2.7.0-20070801 Notice
|
||||
===========================================================================
|
||||
|
||||
Nara Institute of Science and Technology (NAIST),
|
||||
the copyright holders, disclaims all warranties with regard to this
|
||||
software, including all implied warranties of merchantability and
|
||||
fitness, in no event shall NAIST be liable for
|
||||
any special, indirect or consequential damages or any damages
|
||||
whatsoever resulting from loss of use, data or profits, whether in an
|
||||
action of contract, negligence or other tortuous action, arising out
|
||||
of or in connection with the use or performance of this software.
|
||||
|
||||
A large portion of the dictionary entries
|
||||
originate from ICOT Free Software. The following conditions for ICOT
|
||||
Free Software applies to the current dictionary as well.
|
||||
|
||||
Each User may also freely distribute the Program, whether in its
|
||||
original form or modified, to any third party or parties, PROVIDED
|
||||
that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
|
||||
on, or be attached to, the Program, which is distributed substantially
|
||||
in the same form as set out herein and that such intended
|
||||
distribution, if actually made, will neither violate or otherwise
|
||||
contravene any of the laws and regulations of the countries having
|
||||
jurisdiction over the User or the intended distribution itself.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
The program was produced on an experimental basis in the course of the
|
||||
research and development conducted during the project and is provided
|
||||
to users as so produced on an experimental basis. Accordingly, the
|
||||
program is provided without any warranty whatsoever, whether express,
|
||||
implied, statutory or otherwise. The term "warranty" used herein
|
||||
includes, but is not limited to, any warranty of the quality,
|
||||
performance, merchantability and fitness for a particular purpose of
|
||||
the program and the nonexistence of any infringement or violation of
|
||||
any right of any third party.
|
||||
|
||||
Each user of the program will agree and understand, and be deemed to
|
||||
have agreed and understood, that there is no warranty whatsoever for
|
||||
the program and, accordingly, the entire risk arising from or
|
||||
otherwise connected with the program is assumed by the user.
|
||||
|
||||
Therefore, neither ICOT, the copyright holder, or any other
|
||||
organization that participated in or was otherwise related to the
|
||||
development of the program and their respective officials, directors,
|
||||
officers and other employees shall be held liable for any and all
|
||||
damages, including, without limitation, general, special, incidental
|
||||
and consequential damages, arising out of or otherwise in connection
|
||||
with the use or inability to use the program or any product, material
|
||||
or result produced or otherwise obtained by using the program,
|
||||
regardless of whether they have been advised of, or otherwise had
|
||||
knowledge of, the possibility of such damages at any time during the
|
||||
project or thereafter. Each user will be deemed to have agreed to the
|
||||
foregoing by his or her commencement of use of the program. The term
|
||||
"use" as used herein includes, but is not limited to, the use,
|
||||
modification, copying and distribution of the program and the
|
||||
production of secondary products from the program.
|
||||
|
||||
In the case where the program, whether in its original form or
|
||||
modified, was distributed or delivered to or received by a user from
|
||||
any person, organization or entity other than ICOT, unless it makes or
|
||||
grants independently of ICOT any specific warranty to the user in
|
||||
writing, such person, organization or entity, will also be exempted
|
||||
from and not be held liable to the user for any such damages as noted
|
||||
above as far as the program is concerned.
|
20
qa/vector/src/main/java/module-info.java
Normal file
20
qa/vector/src/main/java/module-info.java
Normal file
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
module org.elasticsearch.test.knn {
|
||||
requires org.elasticsearch.base;
|
||||
requires org.elasticsearch.server;
|
||||
requires org.elasticsearch.xcontent;
|
||||
requires org.apache.lucene.core;
|
||||
requires org.apache.lucene.codecs;
|
||||
requires org.apache.lucene.queries;
|
||||
requires org.elasticsearch.logging;
|
||||
requires java.management;
|
||||
requires jdk.management;
|
||||
}
|
|
@ -0,0 +1,292 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.test.knn;
|
||||
|
||||
import org.apache.lucene.index.VectorEncoding;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.core.PathUtils;
|
||||
import org.elasticsearch.xcontent.ObjectParser;
|
||||
import org.elasticsearch.xcontent.ParseField;
|
||||
import org.elasticsearch.xcontent.ToXContentObject;
|
||||
import org.elasticsearch.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.xcontent.XContentParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* Command line arguments for the KNN index tester.
|
||||
* This class encapsulates all the parameters required to run the KNN index tests.
|
||||
*/
|
||||
record CmdLineArgs(
|
||||
Path docVectors,
|
||||
Path queryVectors,
|
||||
int numDocs,
|
||||
int numQueries,
|
||||
KnnIndexTester.IndexType indexType,
|
||||
int numCandidates,
|
||||
int k,
|
||||
int nProbe,
|
||||
int ivfClusterSize,
|
||||
int overSamplingFactor,
|
||||
int hnswM,
|
||||
int hnswEfConstruction,
|
||||
int searchThreads,
|
||||
int indexThreads,
|
||||
boolean reindex,
|
||||
boolean forceMerge,
|
||||
VectorSimilarityFunction vectorSpace,
|
||||
int quantizeBits,
|
||||
VectorEncoding vectorEncoding,
|
||||
int dimensions
|
||||
) implements ToXContentObject {
|
||||
|
||||
static final ParseField DOC_VECTORS_FIELD = new ParseField("doc_vectors");
|
||||
static final ParseField QUERY_VECTORS_FIELD = new ParseField("query_vectors");
|
||||
static final ParseField NUM_DOCS_FIELD = new ParseField("num_docs");
|
||||
static final ParseField NUM_QUERIES_FIELD = new ParseField("num_queries");
|
||||
static final ParseField INDEX_TYPE_FIELD = new ParseField("index_type");
|
||||
static final ParseField NUM_CANDIDATES_FIELD = new ParseField("num_candidates");
|
||||
static final ParseField K_FIELD = new ParseField("k");
|
||||
static final ParseField N_PROBE_FIELD = new ParseField("n_probe");
|
||||
static final ParseField IVF_CLUSTER_SIZE_FIELD = new ParseField("ivf_cluster_size");
|
||||
static final ParseField OVER_SAMPLING_FACTOR_FIELD = new ParseField("over_sampling_factor");
|
||||
static final ParseField HNSW_M_FIELD = new ParseField("hnsw_m");
|
||||
static final ParseField HNSW_EF_CONSTRUCTION_FIELD = new ParseField("hnsw_ef_construction");
|
||||
static final ParseField SEARCH_THREADS_FIELD = new ParseField("search_threads");
|
||||
static final ParseField INDEX_THREADS_FIELD = new ParseField("index_threads");
|
||||
static final ParseField REINDEX_FIELD = new ParseField("reindex");
|
||||
static final ParseField FORCE_MERGE_FIELD = new ParseField("force_merge");
|
||||
static final ParseField VECTOR_SPACE_FIELD = new ParseField("vector_space");
|
||||
static final ParseField QUANTIZE_BITS_FIELD = new ParseField("quantize_bits");
|
||||
static final ParseField VECTOR_ENCODING_FIELD = new ParseField("vector_encoding");
|
||||
static final ParseField DIMENSIONS_FIELD = new ParseField("dimensions");
|
||||
|
||||
static CmdLineArgs fromXContent(XContentParser parser) throws IOException {
|
||||
Builder builder = PARSER.apply(parser, null);
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
static final ObjectParser<CmdLineArgs.Builder, Void> PARSER = new ObjectParser<>("cmd_line_args", true, Builder::new);
|
||||
|
||||
static {
|
||||
PARSER.declareString(Builder::setDocVectors, DOC_VECTORS_FIELD);
|
||||
PARSER.declareString(Builder::setQueryVectors, QUERY_VECTORS_FIELD);
|
||||
PARSER.declareInt(Builder::setNumDocs, NUM_DOCS_FIELD);
|
||||
PARSER.declareInt(Builder::setNumQueries, NUM_QUERIES_FIELD);
|
||||
PARSER.declareString(Builder::setIndexType, INDEX_TYPE_FIELD);
|
||||
PARSER.declareInt(Builder::setNumCandidates, NUM_CANDIDATES_FIELD);
|
||||
PARSER.declareInt(Builder::setK, K_FIELD);
|
||||
PARSER.declareInt(Builder::setNProbe, N_PROBE_FIELD);
|
||||
PARSER.declareInt(Builder::setIvfClusterSize, IVF_CLUSTER_SIZE_FIELD);
|
||||
PARSER.declareInt(Builder::setOverSamplingFactor, OVER_SAMPLING_FACTOR_FIELD);
|
||||
PARSER.declareInt(Builder::setHnswM, HNSW_M_FIELD);
|
||||
PARSER.declareInt(Builder::setHnswEfConstruction, HNSW_EF_CONSTRUCTION_FIELD);
|
||||
PARSER.declareInt(Builder::setSearchThreads, SEARCH_THREADS_FIELD);
|
||||
PARSER.declareInt(Builder::setIndexThreads, INDEX_THREADS_FIELD);
|
||||
PARSER.declareBoolean(Builder::setReindex, REINDEX_FIELD);
|
||||
PARSER.declareBoolean(Builder::setForceMerge, FORCE_MERGE_FIELD);
|
||||
PARSER.declareString(Builder::setVectorSpace, VECTOR_SPACE_FIELD);
|
||||
PARSER.declareInt(Builder::setQuantizeBits, QUANTIZE_BITS_FIELD);
|
||||
PARSER.declareString(Builder::setVectorEncoding, VECTOR_ENCODING_FIELD);
|
||||
PARSER.declareInt(Builder::setDimensions, DIMENSIONS_FIELD);
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject();
|
||||
if (docVectors != null) {
|
||||
builder.field(DOC_VECTORS_FIELD.getPreferredName(), docVectors.toString());
|
||||
}
|
||||
if (queryVectors != null) {
|
||||
builder.field(QUERY_VECTORS_FIELD.getPreferredName(), queryVectors.toString());
|
||||
}
|
||||
builder.field(NUM_DOCS_FIELD.getPreferredName(), numDocs);
|
||||
builder.field(NUM_QUERIES_FIELD.getPreferredName(), numQueries);
|
||||
builder.field(INDEX_TYPE_FIELD.getPreferredName(), indexType.name().toLowerCase(Locale.ROOT));
|
||||
builder.field(NUM_CANDIDATES_FIELD.getPreferredName(), numCandidates);
|
||||
builder.field(K_FIELD.getPreferredName(), k);
|
||||
builder.field(N_PROBE_FIELD.getPreferredName(), nProbe);
|
||||
builder.field(IVF_CLUSTER_SIZE_FIELD.getPreferredName(), ivfClusterSize);
|
||||
builder.field(OVER_SAMPLING_FACTOR_FIELD.getPreferredName(), overSamplingFactor);
|
||||
builder.field(HNSW_M_FIELD.getPreferredName(), hnswM);
|
||||
builder.field(HNSW_EF_CONSTRUCTION_FIELD.getPreferredName(), hnswEfConstruction);
|
||||
builder.field(SEARCH_THREADS_FIELD.getPreferredName(), searchThreads);
|
||||
builder.field(INDEX_THREADS_FIELD.getPreferredName(), indexThreads);
|
||||
builder.field(REINDEX_FIELD.getPreferredName(), reindex);
|
||||
builder.field(FORCE_MERGE_FIELD.getPreferredName(), forceMerge);
|
||||
builder.field(VECTOR_SPACE_FIELD.getPreferredName(), vectorSpace.name().toLowerCase(Locale.ROOT));
|
||||
builder.field(QUANTIZE_BITS_FIELD.getPreferredName(), quantizeBits);
|
||||
builder.field(VECTOR_ENCODING_FIELD.getPreferredName(), vectorEncoding.name().toLowerCase(Locale.ROOT));
|
||||
builder.field(DIMENSIONS_FIELD.getPreferredName(), dimensions);
|
||||
return builder.endObject();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return Strings.toString(this, false, false);
|
||||
}
|
||||
|
||||
static class Builder {
|
||||
private Path docVectors;
|
||||
private Path queryVectors;
|
||||
private int numDocs = 1000;
|
||||
private int numQueries = 100;
|
||||
private KnnIndexTester.IndexType indexType = KnnIndexTester.IndexType.HNSW;
|
||||
private int numCandidates = 1000;
|
||||
private int k = 10;
|
||||
private int nProbe = 10;
|
||||
private int ivfClusterSize = 1000;
|
||||
private int overSamplingFactor = 1;
|
||||
private int hnswM = 16;
|
||||
private int hnswEfConstruction = 200;
|
||||
private int searchThreads = 1;
|
||||
private int indexThreads = 1;
|
||||
private boolean reindex = false;
|
||||
private boolean forceMerge = false;
|
||||
private VectorSimilarityFunction vectorSpace = VectorSimilarityFunction.EUCLIDEAN;
|
||||
private int quantizeBits = 8;
|
||||
private VectorEncoding vectorEncoding = VectorEncoding.FLOAT32;
|
||||
private int dimensions;
|
||||
|
||||
public Builder setDocVectors(String docVectors) {
|
||||
this.docVectors = PathUtils.get(docVectors);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setQueryVectors(String queryVectors) {
|
||||
this.queryVectors = PathUtils.get(queryVectors);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setNumDocs(int numDocs) {
|
||||
this.numDocs = numDocs;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setNumQueries(int numQueries) {
|
||||
this.numQueries = numQueries;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setIndexType(String indexType) {
|
||||
this.indexType = KnnIndexTester.IndexType.valueOf(indexType.toUpperCase(Locale.ROOT));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setNumCandidates(int numCandidates) {
|
||||
this.numCandidates = numCandidates;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setK(int k) {
|
||||
this.k = k;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setNProbe(int nProbe) {
|
||||
this.nProbe = nProbe;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setIvfClusterSize(int ivfClusterSize) {
|
||||
this.ivfClusterSize = ivfClusterSize;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setOverSamplingFactor(int overSamplingFactor) {
|
||||
this.overSamplingFactor = overSamplingFactor;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setHnswM(int hnswM) {
|
||||
this.hnswM = hnswM;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setHnswEfConstruction(int hnswEfConstruction) {
|
||||
this.hnswEfConstruction = hnswEfConstruction;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setSearchThreads(int searchThreads) {
|
||||
this.searchThreads = searchThreads;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setIndexThreads(int indexThreads) {
|
||||
this.indexThreads = indexThreads;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setReindex(boolean reindex) {
|
||||
this.reindex = reindex;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setForceMerge(boolean forceMerge) {
|
||||
this.forceMerge = forceMerge;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setVectorSpace(String vectorSpace) {
|
||||
this.vectorSpace = VectorSimilarityFunction.valueOf(vectorSpace.toUpperCase(Locale.ROOT));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setQuantizeBits(int quantizeBits) {
|
||||
this.quantizeBits = quantizeBits;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setVectorEncoding(String vectorEncoding) {
|
||||
this.vectorEncoding = VectorEncoding.valueOf(vectorEncoding.toUpperCase(Locale.ROOT));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setDimensions(int dimensions) {
|
||||
this.dimensions = dimensions;
|
||||
return this;
|
||||
}
|
||||
|
||||
public CmdLineArgs build() {
|
||||
if (docVectors == null) {
|
||||
throw new IllegalArgumentException("Document vectors path must be provided");
|
||||
}
|
||||
if (dimensions <= 0) {
|
||||
throw new IllegalArgumentException("dimensions must be a positive integer");
|
||||
}
|
||||
return new CmdLineArgs(
|
||||
docVectors,
|
||||
queryVectors,
|
||||
numDocs,
|
||||
numQueries,
|
||||
indexType,
|
||||
numCandidates,
|
||||
k,
|
||||
nProbe,
|
||||
ivfClusterSize,
|
||||
overSamplingFactor,
|
||||
hnswM,
|
||||
hnswEfConstruction,
|
||||
searchThreads,
|
||||
indexThreads,
|
||||
reindex,
|
||||
forceMerge,
|
||||
vectorSpace,
|
||||
quantizeBits,
|
||||
vectorEncoding,
|
||||
dimensions
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,399 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.test.knn;
|
||||
|
||||
import com.sun.management.ThreadMXBean;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
|
||||
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.logging.LogConfigurator;
|
||||
import org.elasticsearch.core.PathUtils;
|
||||
import org.elasticsearch.index.codec.vectors.ES813Int8FlatVectorFormat;
|
||||
import org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat;
|
||||
import org.elasticsearch.index.codec.vectors.IVFVectorsFormat;
|
||||
import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat;
|
||||
import org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat;
|
||||
import org.elasticsearch.logging.Level;
|
||||
import org.elasticsearch.xcontent.XContentParser;
|
||||
import org.elasticsearch.xcontent.XContentParserConfiguration;
|
||||
import org.elasticsearch.xcontent.XContentType;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.lang.management.ThreadInfo;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* A utility class to create and test KNN indices using Lucene.
|
||||
* It supports various index types (HNSW, FLAT, IVF) and configurations.
|
||||
*/
|
||||
public class KnnIndexTester {
|
||||
static final Level LOG_LEVEL = Level.DEBUG;
|
||||
|
||||
static final SysOutLogger logger = new SysOutLogger();
|
||||
|
||||
static {
|
||||
LogConfigurator.loadLog4jPlugins();
|
||||
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
|
||||
}
|
||||
|
||||
static final String INDEX_DIR = "target/knn_index";
|
||||
|
||||
enum IndexType {
|
||||
HNSW,
|
||||
FLAT,
|
||||
IVF
|
||||
}
|
||||
|
||||
private static String formatIndexPath(CmdLineArgs args) {
|
||||
List<String> suffix = new ArrayList<>();
|
||||
if (args.indexType() == IndexType.FLAT) {
|
||||
suffix.add("flat");
|
||||
} else if (args.indexType() == IndexType.IVF) {
|
||||
suffix.add("ivf");
|
||||
suffix.add(Integer.toString(args.ivfClusterSize()));
|
||||
} else {
|
||||
suffix.add(Integer.toString(args.hnswM()));
|
||||
suffix.add(Integer.toString(args.hnswEfConstruction()));
|
||||
if (args.quantizeBits() < 32) {
|
||||
suffix.add(Integer.toString(args.quantizeBits()));
|
||||
}
|
||||
}
|
||||
return INDEX_DIR + "/" + args.docVectors().getFileName() + "-" + String.join("-", suffix) + ".index";
|
||||
}
|
||||
|
||||
static Codec createCodec(CmdLineArgs args) {
|
||||
final KnnVectorsFormat format;
|
||||
if (args.indexType() == IndexType.IVF) {
|
||||
format = new IVFVectorsFormat(args.ivfClusterSize());
|
||||
} else {
|
||||
if (args.quantizeBits() == 1) {
|
||||
if (args.indexType() == IndexType.FLAT) {
|
||||
format = new ES818BinaryQuantizedVectorsFormat();
|
||||
} else {
|
||||
format = new ES818HnswBinaryQuantizedVectorsFormat(args.hnswM(), args.hnswEfConstruction(), 1, null);
|
||||
}
|
||||
} else if (args.quantizeBits() < 32) {
|
||||
if (args.indexType() == IndexType.FLAT) {
|
||||
format = new ES813Int8FlatVectorFormat(null, args.quantizeBits(), true);
|
||||
} else {
|
||||
format = new ES814HnswScalarQuantizedVectorsFormat(
|
||||
args.hnswM(),
|
||||
args.hnswEfConstruction(),
|
||||
null,
|
||||
args.quantizeBits(),
|
||||
true
|
||||
);
|
||||
}
|
||||
} else {
|
||||
format = new Lucene99HnswVectorsFormat(args.hnswM(), args.hnswEfConstruction(), 1, null);
|
||||
}
|
||||
}
|
||||
return new Lucene101Codec() {
|
||||
@Override
|
||||
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||
return format;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Main method to run the KNN index tester.
|
||||
* It parses command line arguments, creates the index, and runs searches if specified.
|
||||
*
|
||||
* @param args Command line arguments
|
||||
* @throws Exception If an error occurs during index creation or search
|
||||
*/
|
||||
public static void main(String[] args) throws Exception {
|
||||
if (args.length != 1 || args[0].equals("--help") || args[0].equals("-h")) {
|
||||
// printout an example configuration formatted file and indicate that it is required
|
||||
System.out.println("Usage: java -cp <your-classpath> org.elasticsearch.test.knn.KnnIndexTester <config-file>");
|
||||
System.out.println("Where <config-file> is a JSON file containing one or more configurations for the KNN index tester.");
|
||||
System.out.println("An example configuration object: ");
|
||||
System.out.println(
|
||||
Strings.toString(
|
||||
new CmdLineArgs.Builder().setDimensions(64)
|
||||
.setDocVectors("/doc/vectors/path")
|
||||
.setQueryVectors("/query/vectors/path")
|
||||
.build(),
|
||||
true,
|
||||
true
|
||||
)
|
||||
);
|
||||
return;
|
||||
}
|
||||
String jsonConfig = args[0];
|
||||
// Parse command line arguments
|
||||
Path jsonConfigPath = PathUtils.get(jsonConfig);
|
||||
if (Files.exists(jsonConfigPath) == false) {
|
||||
throw new IllegalArgumentException("JSON config file does not exist: " + jsonConfigPath);
|
||||
}
|
||||
// Parse the JSON config file to get command line arguments
|
||||
// This assumes that CmdLineArgs.fromXContent is implemented to parse the JSON file
|
||||
List<CmdLineArgs> cmdLineArgsList = new ArrayList<>();
|
||||
try (
|
||||
InputStream jsonStream = Files.newInputStream(jsonConfigPath);
|
||||
XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, jsonStream)
|
||||
) {
|
||||
// check if the parser is at the start of an object if so, we only have one set of arguments
|
||||
if (parser.currentToken() == null && parser.nextToken() == XContentParser.Token.START_OBJECT) {
|
||||
cmdLineArgsList.add(CmdLineArgs.fromXContent(parser));
|
||||
} else if (parser.currentToken() == XContentParser.Token.START_ARRAY) {
|
||||
// if the parser is at the start of an array, we have multiple sets of arguments
|
||||
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
|
||||
cmdLineArgsList.add(CmdLineArgs.fromXContent(parser));
|
||||
}
|
||||
} else {
|
||||
throw new IllegalArgumentException("Invalid JSON format in config file: " + jsonConfigPath);
|
||||
}
|
||||
}
|
||||
FormattedResults formattedResults = new FormattedResults();
|
||||
for (CmdLineArgs cmdLineArgs : cmdLineArgsList) {
|
||||
Results result = new Results(cmdLineArgs.indexType().name().toLowerCase(Locale.ROOT), cmdLineArgs.numDocs());
|
||||
System.out.println("Running KNN index tester with arguments: " + cmdLineArgs);
|
||||
Codec codec = createCodec(cmdLineArgs);
|
||||
Path indexPath = PathUtils.get(formatIndexPath(cmdLineArgs));
|
||||
if (cmdLineArgs.reindex() || cmdLineArgs.forceMerge()) {
|
||||
KnnIndexer knnIndexer = new KnnIndexer(
|
||||
cmdLineArgs.docVectors(),
|
||||
indexPath,
|
||||
codec,
|
||||
cmdLineArgs.indexThreads(),
|
||||
cmdLineArgs.vectorEncoding(),
|
||||
cmdLineArgs.dimensions(),
|
||||
cmdLineArgs.vectorSpace(),
|
||||
cmdLineArgs.numDocs()
|
||||
);
|
||||
if (Files.exists(indexPath) == false) {
|
||||
if (cmdLineArgs.reindex() == false) {
|
||||
throw new IllegalArgumentException("Index path does not exist: " + indexPath);
|
||||
}
|
||||
if (cmdLineArgs.forceMerge()) {
|
||||
throw new IllegalArgumentException("Force merging without an existing index in: " + indexPath);
|
||||
}
|
||||
}
|
||||
if (cmdLineArgs.reindex()) {
|
||||
knnIndexer.createIndex(result);
|
||||
}
|
||||
if (cmdLineArgs.forceMerge()) {
|
||||
knnIndexer.forceMerge(result);
|
||||
} else {
|
||||
knnIndexer.numSegments(result);
|
||||
}
|
||||
}
|
||||
if (cmdLineArgs.queryVectors() != null) {
|
||||
KnnSearcher knnSearcher = new KnnSearcher(indexPath, cmdLineArgs);
|
||||
knnSearcher.runSearch(result);
|
||||
}
|
||||
formattedResults.results.add(result);
|
||||
}
|
||||
System.out.println("Results:");
|
||||
System.out.println(formattedResults);
|
||||
}
|
||||
|
||||
static class FormattedResults {
|
||||
List<Results> results = new ArrayList<>();
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
if (results.isEmpty()) {
|
||||
return "No results available.";
|
||||
}
|
||||
|
||||
// Define column headers
|
||||
String[] headers = {
|
||||
"index_type",
|
||||
"num_docs",
|
||||
"index_time(ms)",
|
||||
"force_merge_time(ms)",
|
||||
"num_segments",
|
||||
"latency(ms)",
|
||||
"net_cpu_time(ms)",
|
||||
"avg_cpu_count",
|
||||
"QPS",
|
||||
"recall",
|
||||
"visited" };
|
||||
|
||||
// Calculate appropriate column widths based on headers and data
|
||||
int[] widths = calculateColumnWidths(headers);
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
// Format and append header
|
||||
sb.append(formatRow(headers, widths));
|
||||
sb.append("\n");
|
||||
|
||||
// Add separator line
|
||||
for (int width : widths) {
|
||||
sb.append("-".repeat(width)).append(" ");
|
||||
}
|
||||
sb.append("\n");
|
||||
|
||||
// Format and append each row of data
|
||||
for (Results result : results) {
|
||||
String[] rowData = {
|
||||
result.indexType,
|
||||
Integer.toString(result.numDocs),
|
||||
Long.toString(result.indexTimeMS),
|
||||
Long.toString(result.forceMergeTimeMS),
|
||||
Integer.toString(result.numSegments),
|
||||
String.format(Locale.ROOT, "%.2f", result.avgLatency),
|
||||
String.format(Locale.ROOT, "%.2f", result.netCpuTimeMS),
|
||||
String.format(Locale.ROOT, "%.2f", result.avgCpuCount),
|
||||
String.format(Locale.ROOT, "%.2f", result.qps),
|
||||
String.format(Locale.ROOT, "%.2f", result.avgRecall),
|
||||
String.format(Locale.ROOT, "%.2f", result.averageVisited) };
|
||||
sb.append(formatRow(rowData, widths));
|
||||
sb.append("\n");
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
// Helper method to format a single row with proper column widths
|
||||
private String formatRow(String[] values, int[] widths) {
|
||||
StringBuilder row = new StringBuilder();
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
// Left-align text column (index_type), right-align numeric columns
|
||||
String format = (i == 0) ? "%-" + widths[i] + "s" : "%" + widths[i] + "s";
|
||||
row.append(Strings.format(format, values[i]));
|
||||
|
||||
// Add separation between columns
|
||||
if (i < values.length - 1) {
|
||||
row.append(" ");
|
||||
}
|
||||
}
|
||||
return row.toString();
|
||||
}
|
||||
|
||||
// Calculate appropriate column widths based on headers and data
|
||||
private int[] calculateColumnWidths(String[] headers) {
|
||||
int[] widths = new int[headers.length];
|
||||
|
||||
// Initialize widths with header lengths
|
||||
for (int i = 0; i < headers.length; i++) {
|
||||
widths[i] = headers[i].length();
|
||||
}
|
||||
|
||||
// Update widths based on data
|
||||
for (Results result : results) {
|
||||
String[] values = {
|
||||
result.indexType,
|
||||
Integer.toString(result.numDocs),
|
||||
Long.toString(result.indexTimeMS),
|
||||
Long.toString(result.forceMergeTimeMS),
|
||||
Integer.toString(result.numSegments),
|
||||
String.format(Locale.ROOT, "%.2f", result.avgLatency),
|
||||
String.format(Locale.ROOT, "%.2f", result.netCpuTimeMS),
|
||||
String.format(Locale.ROOT, "%.2f", result.avgCpuCount),
|
||||
String.format(Locale.ROOT, "%.2f", result.qps),
|
||||
String.format(Locale.ROOT, "%.2f", result.avgRecall),
|
||||
String.format(Locale.ROOT, "%.2f", result.averageVisited) };
|
||||
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
widths[i] = Math.max(widths[i], values[i].length());
|
||||
}
|
||||
}
|
||||
|
||||
return widths;
|
||||
}
|
||||
}
|
||||
|
||||
static class Results {
|
||||
final String indexType;
|
||||
final int numDocs;
|
||||
long indexTimeMS;
|
||||
long forceMergeTimeMS;
|
||||
int numSegments;
|
||||
double avgLatency;
|
||||
double qps;
|
||||
double avgRecall;
|
||||
double averageVisited;
|
||||
double netCpuTimeMS;
|
||||
double avgCpuCount;
|
||||
|
||||
Results(String indexType, int numDocs) {
|
||||
this.indexType = indexType;
|
||||
this.numDocs = numDocs;
|
||||
}
|
||||
}
|
||||
|
||||
static final class SysOutLogger {
|
||||
|
||||
void warn(String message) {
|
||||
if (LOG_LEVEL.ordinal() >= Level.WARN.ordinal()) {
|
||||
System.out.println(message);
|
||||
}
|
||||
}
|
||||
|
||||
void warn(String message, Object... params) {
|
||||
if (LOG_LEVEL.ordinal() >= Level.WARN.ordinal()) {
|
||||
System.out.println(String.format(Locale.ROOT, message, params));
|
||||
}
|
||||
}
|
||||
|
||||
void info(String message) {
|
||||
if (LOG_LEVEL.ordinal() >= Level.INFO.ordinal()) {
|
||||
System.out.println(message);
|
||||
}
|
||||
}
|
||||
|
||||
void info(String message, Object... params) {
|
||||
if (LOG_LEVEL.ordinal() >= Level.INFO.ordinal()) {
|
||||
System.out.println(String.format(Locale.ROOT, message, params));
|
||||
}
|
||||
}
|
||||
|
||||
void debug(String message) {
|
||||
if (LOG_LEVEL.ordinal() >= Level.DEBUG.ordinal()) {
|
||||
System.out.println(message);
|
||||
}
|
||||
}
|
||||
|
||||
void debug(String message, Object... params) {
|
||||
if (LOG_LEVEL.ordinal() >= Level.DEBUG.ordinal()) {
|
||||
System.out.println(String.format(Locale.ROOT, message, params));
|
||||
}
|
||||
}
|
||||
|
||||
void trace(String message) {
|
||||
if (LOG_LEVEL == Level.TRACE) {
|
||||
System.out.println(message);
|
||||
}
|
||||
}
|
||||
|
||||
void trace(String message, Object... params) {
|
||||
if (LOG_LEVEL == Level.TRACE) {
|
||||
System.out.println(String.format(Locale.ROOT, message, params));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static final class ThreadDetails {
|
||||
private static final ThreadMXBean threadBean = (ThreadMXBean) java.lang.management.ManagementFactory.getThreadMXBean();
|
||||
public final long[] threadIDs;
|
||||
public final long[] cpuTimesNS;
|
||||
public final ThreadInfo[] threadInfos;
|
||||
public final long ns;
|
||||
|
||||
ThreadDetails() {
|
||||
ns = System.nanoTime();
|
||||
threadIDs = threadBean.getAllThreadIds();
|
||||
cpuTimesNS = threadBean.getThreadCpuTime(threadIDs);
|
||||
threadInfos = threadBean.getThreadInfo(threadIDs);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,329 @@
|
|||
/*
|
||||
* @notice
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* a copy and modification from Lucene util
|
||||
* Modifications copyright (C) 2025 Elasticsearch B.V.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.test.knn;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.KnnByteVectorField;
|
||||
import org.apache.lucene.document.KnnFloatVectorField;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.VectorEncoding;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.util.PrintStreamInfoStream;
|
||||
import org.elasticsearch.common.io.Channels;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UncheckedIOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import static org.elasticsearch.test.knn.KnnIndexTester.logger;
|
||||
|
||||
class KnnIndexer {
|
||||
private static final double WRITER_BUFFER_MB = 128;
|
||||
static final String ID_FIELD = "id";
|
||||
static final String VECTOR_FIELD = "vector";
|
||||
|
||||
private final Path docsPath;
|
||||
private final Path indexPath;
|
||||
private final VectorEncoding vectorEncoding;
|
||||
private final int dim;
|
||||
private final VectorSimilarityFunction similarityFunction;
|
||||
private final Codec codec;
|
||||
private final int numDocs;
|
||||
private final int numIndexThreads;
|
||||
|
||||
KnnIndexer(
|
||||
Path docsPath,
|
||||
Path indexPath,
|
||||
Codec codec,
|
||||
int numIndexThreads,
|
||||
VectorEncoding vectorEncoding,
|
||||
int dim,
|
||||
VectorSimilarityFunction similarityFunction,
|
||||
int numDocs
|
||||
) {
|
||||
this.docsPath = docsPath;
|
||||
this.indexPath = indexPath;
|
||||
this.codec = codec;
|
||||
this.numIndexThreads = numIndexThreads;
|
||||
this.vectorEncoding = vectorEncoding;
|
||||
this.dim = dim;
|
||||
this.similarityFunction = similarityFunction;
|
||||
this.numDocs = numDocs;
|
||||
}
|
||||
|
||||
void numSegments(KnnIndexTester.Results result) {
|
||||
try (FSDirectory dir = FSDirectory.open(indexPath); IndexReader reader = DirectoryReader.open(dir)) {
|
||||
result.numSegments = reader.leaves().size();
|
||||
} catch (IOException e) {
|
||||
throw new UncheckedIOException("Failed to get segment count for index at " + indexPath, e);
|
||||
}
|
||||
}
|
||||
|
||||
void createIndex(KnnIndexTester.Results result) throws IOException, InterruptedException, ExecutionException {
|
||||
IndexWriterConfig iwc = new IndexWriterConfig().setOpenMode(IndexWriterConfig.OpenMode.CREATE);
|
||||
iwc.setCodec(codec);
|
||||
iwc.setRAMBufferSizeMB(WRITER_BUFFER_MB);
|
||||
iwc.setUseCompoundFile(false);
|
||||
|
||||
iwc.setMaxFullFlushMergeWaitMillis(0);
|
||||
|
||||
FieldType fieldType = switch (vectorEncoding) {
|
||||
case BYTE -> KnnByteVectorField.createFieldType(dim, similarityFunction);
|
||||
case FLOAT32 -> KnnFloatVectorField.createFieldType(dim, similarityFunction);
|
||||
};
|
||||
iwc.setInfoStream(new PrintStreamInfoStream(System.out) {
|
||||
@Override
|
||||
public boolean isEnabled(String component) {
|
||||
return Objects.equals(component, "IVF");
|
||||
}
|
||||
});
|
||||
logger.debug(
|
||||
"KnnIndexer: using codec=%s, vectorEncoding=%s, dim=%d, similarityFunction=%s",
|
||||
codec.getName(),
|
||||
vectorEncoding,
|
||||
dim,
|
||||
similarityFunction
|
||||
);
|
||||
|
||||
if (Files.exists(indexPath)) {
|
||||
logger.debug("KnnIndexer: existing index at %s", indexPath);
|
||||
} else {
|
||||
Files.createDirectories(indexPath);
|
||||
}
|
||||
|
||||
long start = System.nanoTime();
|
||||
try (
|
||||
FSDirectory dir = FSDirectory.open(indexPath);
|
||||
IndexWriter iw = new IndexWriter(dir, iwc);
|
||||
FileChannel in = FileChannel.open(docsPath)
|
||||
) {
|
||||
long docsPathSizeInBytes = in.size();
|
||||
if (docsPathSizeInBytes % ((long) dim * vectorEncoding.byteSize) != 0) {
|
||||
throw new IllegalArgumentException(
|
||||
"docsPath \"" + docsPath + "\" does not contain a whole number of vectors? size=" + docsPathSizeInBytes
|
||||
);
|
||||
}
|
||||
logger.info(
|
||||
"docsPathSizeInBytes=%d, dim=%d, vectorEncoding=%s, byteSize=%d",
|
||||
docsPathSizeInBytes,
|
||||
dim,
|
||||
vectorEncoding,
|
||||
vectorEncoding.byteSize
|
||||
);
|
||||
|
||||
VectorReader inReader = VectorReader.create(in, dim, vectorEncoding);
|
||||
try (ExecutorService exec = Executors.newFixedThreadPool(numIndexThreads, r -> new Thread(r, "KnnIndexer-Thread"))) {
|
||||
AtomicInteger numDocsIndexed = new AtomicInteger();
|
||||
List<Future<?>> threads = new ArrayList<>();
|
||||
for (int i = 0; i < numIndexThreads; i++) {
|
||||
Thread t = new IndexerThread(iw, inReader, dim, vectorEncoding, fieldType, numDocsIndexed, numDocs);
|
||||
t.setDaemon(true);
|
||||
threads.add(exec.submit(t));
|
||||
}
|
||||
for (Future<?> t : threads) {
|
||||
t.get();
|
||||
}
|
||||
}
|
||||
logger.debug("all indexing threads finished, now IndexWriter.commit()");
|
||||
iw.commit();
|
||||
ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) iwc.getMergeScheduler();
|
||||
cms.sync();
|
||||
}
|
||||
|
||||
long elapsed = System.nanoTime() - start;
|
||||
logger.debug("Indexing took %d ms for %d docs", TimeUnit.NANOSECONDS.toMillis(elapsed), numDocs);
|
||||
result.indexTimeMS = TimeUnit.NANOSECONDS.toMillis(elapsed);
|
||||
}
|
||||
|
||||
void forceMerge(KnnIndexTester.Results results) throws Exception {
|
||||
IndexWriterConfig iwc = new IndexWriterConfig().setOpenMode(IndexWriterConfig.OpenMode.APPEND);
|
||||
iwc.setInfoStream(new PrintStreamInfoStream(System.out) {
|
||||
@Override
|
||||
public boolean isEnabled(String component) {
|
||||
return Objects.equals(component, "IVF");
|
||||
}
|
||||
});
|
||||
iwc.setCodec(codec);
|
||||
logger.debug("KnnIndexer: forceMerge in %s", indexPath);
|
||||
long startNS = System.nanoTime();
|
||||
try (IndexWriter iw = new IndexWriter(FSDirectory.open(indexPath), iwc)) {
|
||||
iw.forceMerge(1);
|
||||
}
|
||||
long endNS = System.nanoTime();
|
||||
long elapsedNSec = (endNS - startNS);
|
||||
logger.info("forceMerge took %d ms", TimeUnit.NANOSECONDS.toMillis(elapsedNSec));
|
||||
results.forceMergeTimeMS = TimeUnit.NANOSECONDS.toMillis(elapsedNSec);
|
||||
}
|
||||
|
||||
static class IndexerThread extends Thread {
|
||||
private final IndexWriter iw;
|
||||
private final AtomicInteger numDocsIndexed;
|
||||
private final int numDocsToIndex;
|
||||
private final FieldType fieldType;
|
||||
private final VectorEncoding vectorEncoding;
|
||||
private final byte[] byteVectorBuffer;
|
||||
private final float[] floatVectorBuffer;
|
||||
private final VectorReader in;
|
||||
|
||||
private IndexerThread(
|
||||
IndexWriter iw,
|
||||
VectorReader in,
|
||||
int dims,
|
||||
VectorEncoding vectorEncoding,
|
||||
FieldType fieldType,
|
||||
AtomicInteger numDocsIndexed,
|
||||
int numDocsToIndex
|
||||
) {
|
||||
this.iw = iw;
|
||||
this.in = in;
|
||||
this.vectorEncoding = vectorEncoding;
|
||||
this.fieldType = fieldType;
|
||||
this.numDocsIndexed = numDocsIndexed;
|
||||
this.numDocsToIndex = numDocsToIndex;
|
||||
switch (vectorEncoding) {
|
||||
case BYTE -> {
|
||||
byteVectorBuffer = new byte[dims];
|
||||
floatVectorBuffer = null;
|
||||
}
|
||||
case FLOAT32 -> {
|
||||
floatVectorBuffer = new float[dims];
|
||||
byteVectorBuffer = null;
|
||||
}
|
||||
default -> throw new IllegalArgumentException("unexpected vector encoding: " + vectorEncoding);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
_run();
|
||||
} catch (IOException ioe) {
|
||||
throw new UncheckedIOException(ioe);
|
||||
}
|
||||
}
|
||||
|
||||
private void _run() throws IOException {
|
||||
while (true) {
|
||||
int id = numDocsIndexed.getAndIncrement();
|
||||
if (id >= numDocsToIndex) {
|
||||
break;
|
||||
}
|
||||
|
||||
Document doc = new Document();
|
||||
switch (vectorEncoding) {
|
||||
case BYTE -> {
|
||||
in.next(byteVectorBuffer);
|
||||
doc.add(new KnnByteVectorField(VECTOR_FIELD, byteVectorBuffer, fieldType));
|
||||
}
|
||||
case FLOAT32 -> {
|
||||
in.next(floatVectorBuffer);
|
||||
doc.add(new KnnFloatVectorField(VECTOR_FIELD, floatVectorBuffer, fieldType));
|
||||
}
|
||||
}
|
||||
|
||||
if ((id + 1) % 25000 == 0) {
|
||||
logger.debug("Done indexing " + (id + 1) + " documents.");
|
||||
}
|
||||
doc.add(new StoredField(ID_FIELD, id));
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class VectorReader {
|
||||
final float[] target;
|
||||
final ByteBuffer bytes;
|
||||
final FileChannel input;
|
||||
long position;
|
||||
|
||||
static VectorReader create(FileChannel input, int dim, VectorEncoding vectorEncoding) throws IOException {
|
||||
int bufferSize = dim * vectorEncoding.byteSize;
|
||||
if (input.size() % ((long) dim * vectorEncoding.byteSize) != 0) {
|
||||
throw new IllegalArgumentException(
|
||||
"vectors file \"" + input + "\" does not contain a whole number of vectors? size=" + input.size()
|
||||
);
|
||||
}
|
||||
return new VectorReader(input, dim, bufferSize);
|
||||
}
|
||||
|
||||
VectorReader(FileChannel input, int dim, int bufferSize) throws IOException {
|
||||
this.bytes = ByteBuffer.wrap(new byte[bufferSize]).order(ByteOrder.LITTLE_ENDIAN);
|
||||
this.input = input;
|
||||
this.target = new float[dim];
|
||||
reset();
|
||||
}
|
||||
|
||||
void reset() throws IOException {
|
||||
position = 0;
|
||||
input.position(position);
|
||||
}
|
||||
|
||||
private void readNext() throws IOException {
|
||||
int bytesRead = Channels.readFromFileChannel(this.input, position, bytes);
|
||||
if (bytesRead < bytes.capacity()) {
|
||||
position = 0;
|
||||
bytes.position(0);
|
||||
// wrap around back to the start of the file if we hit the end:
|
||||
logger.warn("VectorReader hit EOF when reading " + this.input + "; now wrapping around to start of file again");
|
||||
this.input.position(position);
|
||||
bytesRead = Channels.readFromFileChannel(this.input, position, bytes);
|
||||
if (bytesRead < bytes.capacity()) {
|
||||
throw new IllegalStateException(
|
||||
"vector file " + input + " doesn't even have enough bytes for a single vector? got bytesRead=" + bytesRead
|
||||
);
|
||||
}
|
||||
}
|
||||
position += bytesRead;
|
||||
bytes.position(0);
|
||||
}
|
||||
|
||||
synchronized void next(float[] dest) throws IOException {
|
||||
readNext();
|
||||
bytes.asFloatBuffer().get(dest);
|
||||
}
|
||||
|
||||
synchronized void next(byte[] dest) throws IOException {
|
||||
readNext();
|
||||
bytes.get(dest);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,488 @@
|
|||
/*
|
||||
* @notice
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* a copy and modification from Lucene util
|
||||
* Modifications copyright (C) 2025 Elasticsearch B.V.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.test.knn;
|
||||
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.StoredFields;
|
||||
import org.apache.lucene.index.VectorEncoding;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.queries.function.FunctionQuery;
|
||||
import org.apache.lucene.queries.function.valuesource.ByteKnnVectorFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.ByteVectorSimilarityFunction;
|
||||
import org.apache.lucene.queries.function.valuesource.ConstKnnByteVectorValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.ConstKnnFloatValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.FloatKnnVectorFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.FloatVectorSimilarityFunction;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.TotalHits;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.MMapDirectory;
|
||||
import org.elasticsearch.core.PathUtils;
|
||||
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
|
||||
import org.elasticsearch.search.profile.query.QueryProfiler;
|
||||
import org.elasticsearch.search.vectors.ESKnnByteVectorQuery;
|
||||
import org.elasticsearch.search.vectors.ESKnnFloatVectorQuery;
|
||||
import org.elasticsearch.search.vectors.IVFKnnFloatVectorQuery;
|
||||
import org.elasticsearch.search.vectors.QueryProfilerProvider;
|
||||
import org.elasticsearch.search.vectors.RescoreKnnVectorQuery;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.IntBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.attribute.FileTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.ForkJoinPool;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||
import static org.elasticsearch.test.knn.KnnIndexTester.logger;
|
||||
import static org.elasticsearch.test.knn.KnnIndexer.ID_FIELD;
|
||||
import static org.elasticsearch.test.knn.KnnIndexer.VECTOR_FIELD;
|
||||
|
||||
class KnnSearcher {
|
||||
|
||||
private final Path docPath;
|
||||
private final Path indexPath;
|
||||
private final Path queryPath;
|
||||
private final int numDocs;
|
||||
private final int numQueryVectors;
|
||||
private final long randomSeed = 42;
|
||||
private final float selectivity = 1f;
|
||||
private final int topK;
|
||||
private final int efSearch;
|
||||
private final int nProbe;
|
||||
private final KnnIndexTester.IndexType indexType;
|
||||
private final int dim;
|
||||
private final VectorSimilarityFunction similarityFunction;
|
||||
private final VectorEncoding vectorEncoding;
|
||||
private final float overSamplingFactor;
|
||||
private final int searchThreads;
|
||||
|
||||
KnnSearcher(Path indexPath, CmdLineArgs cmdLineArgs) {
|
||||
this.docPath = cmdLineArgs.docVectors();
|
||||
this.indexPath = indexPath;
|
||||
this.queryPath = cmdLineArgs.queryVectors();
|
||||
this.numDocs = cmdLineArgs.numDocs();
|
||||
this.numQueryVectors = cmdLineArgs.numQueries();
|
||||
this.topK = cmdLineArgs.k();
|
||||
this.dim = cmdLineArgs.dimensions();
|
||||
this.similarityFunction = cmdLineArgs.vectorSpace();
|
||||
this.vectorEncoding = cmdLineArgs.vectorEncoding();
|
||||
this.overSamplingFactor = cmdLineArgs.overSamplingFactor();
|
||||
if (numQueryVectors <= 0) {
|
||||
throw new IllegalArgumentException("numQueryVectors must be > 0");
|
||||
}
|
||||
this.efSearch = cmdLineArgs.numCandidates();
|
||||
this.nProbe = cmdLineArgs.nProbe();
|
||||
this.indexType = cmdLineArgs.indexType();
|
||||
this.searchThreads = cmdLineArgs.searchThreads();
|
||||
}
|
||||
|
||||
void runSearch(KnnIndexTester.Results finalResults) throws IOException {
|
||||
TopDocs[] results = new TopDocs[numQueryVectors];
|
||||
int[][] resultIds = new int[numQueryVectors][];
|
||||
long elapsed, totalCpuTimeMS, totalVisited = 0;
|
||||
try (
|
||||
FileChannel input = FileChannel.open(queryPath);
|
||||
ExecutorService executorService = Executors.newFixedThreadPool(searchThreads, r -> new Thread(r, "KnnSearcher-Thread"))
|
||||
) {
|
||||
long queryPathSizeInBytes = input.size();
|
||||
logger.info(
|
||||
"queryPath size: "
|
||||
+ queryPathSizeInBytes
|
||||
+ " bytes, assuming vector count is "
|
||||
+ (queryPathSizeInBytes / ((long) dim * vectorEncoding.byteSize))
|
||||
);
|
||||
KnnIndexer.VectorReader targetReader = KnnIndexer.VectorReader.create(input, dim, vectorEncoding);
|
||||
long startNS;
|
||||
try (MMapDirectory dir = new MMapDirectory(indexPath)) {
|
||||
try (DirectoryReader reader = DirectoryReader.open(dir)) {
|
||||
IndexSearcher searcher = searchThreads > 1 ? new IndexSearcher(reader, executorService) : new IndexSearcher(reader);
|
||||
byte[] targetBytes = new byte[dim];
|
||||
float[] target = new float[dim];
|
||||
// warm up
|
||||
for (int i = 0; i < numQueryVectors; i++) {
|
||||
if (vectorEncoding.equals(VectorEncoding.BYTE)) {
|
||||
targetReader.next(targetBytes);
|
||||
doVectorQuery(targetBytes, searcher);
|
||||
} else {
|
||||
targetReader.next(target);
|
||||
doVectorQuery(target, searcher);
|
||||
}
|
||||
}
|
||||
targetReader.reset();
|
||||
startNS = System.nanoTime();
|
||||
KnnIndexTester.ThreadDetails startThreadDetails = new KnnIndexTester.ThreadDetails();
|
||||
for (int i = 0; i < numQueryVectors; i++) {
|
||||
if (vectorEncoding.equals(VectorEncoding.BYTE)) {
|
||||
targetReader.next(targetBytes);
|
||||
results[i] = doVectorQuery(targetBytes, searcher);
|
||||
} else {
|
||||
targetReader.next(target);
|
||||
results[i] = doVectorQuery(target, searcher);
|
||||
}
|
||||
}
|
||||
KnnIndexTester.ThreadDetails endThreadDetails = new KnnIndexTester.ThreadDetails();
|
||||
elapsed = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNS);
|
||||
long startCPUTimeNS = 0;
|
||||
long endCPUTimeNS = 0;
|
||||
for (int i = 0; i < startThreadDetails.threadInfos.length; i++) {
|
||||
if (startThreadDetails.threadInfos[i].getThreadName().startsWith("KnnSearcher-Thread")) {
|
||||
startCPUTimeNS += startThreadDetails.cpuTimesNS[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < endThreadDetails.threadInfos.length; i++) {
|
||||
if (endThreadDetails.threadInfos[i].getThreadName().startsWith("KnnSearcher-Thread")) {
|
||||
endCPUTimeNS += endThreadDetails.cpuTimesNS[i];
|
||||
}
|
||||
}
|
||||
totalCpuTimeMS = TimeUnit.NANOSECONDS.toMillis(endCPUTimeNS - startCPUTimeNS);
|
||||
|
||||
// Fetch, validate and write result document ids.
|
||||
StoredFields storedFields = reader.storedFields();
|
||||
for (int i = 0; i < numQueryVectors; i++) {
|
||||
totalVisited += results[i].totalHits.value();
|
||||
resultIds[i] = getResultIds(results[i], storedFields);
|
||||
}
|
||||
logger.info(
|
||||
"completed %d searches in %d ms: %d QPS CPU time=%dms",
|
||||
numQueryVectors,
|
||||
elapsed,
|
||||
(1000L * numQueryVectors) / elapsed,
|
||||
totalCpuTimeMS
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
logger.info("checking results");
|
||||
int[][] nn = getOrCalculateExactNN();
|
||||
finalResults.avgRecall = checkResults(resultIds, nn, topK);
|
||||
finalResults.qps = (1000f * numQueryVectors) / elapsed;
|
||||
finalResults.avgLatency = (float) elapsed / numQueryVectors;
|
||||
finalResults.averageVisited = (double) totalVisited / numQueryVectors;
|
||||
finalResults.netCpuTimeMS = (double) totalCpuTimeMS / numQueryVectors;
|
||||
finalResults.avgCpuCount = (double) totalCpuTimeMS / elapsed;
|
||||
}
|
||||
|
||||
private int[][] getOrCalculateExactNN() throws IOException {
|
||||
// look in working directory for cached nn file
|
||||
String hash = Integer.toString(
|
||||
Objects.hash(
|
||||
docPath,
|
||||
indexPath,
|
||||
queryPath,
|
||||
numDocs,
|
||||
numQueryVectors,
|
||||
topK,
|
||||
similarityFunction.ordinal(),
|
||||
selectivity,
|
||||
randomSeed
|
||||
),
|
||||
36
|
||||
);
|
||||
String nnFileName = "nn-" + hash + ".bin";
|
||||
Path nnPath = PathUtils.get("target/" + nnFileName);
|
||||
if (Files.exists(nnPath) && isNewer(nnPath, docPath, indexPath, queryPath)) {
|
||||
logger.info("read pre-cached exact match vectors from cache file \"" + nnPath + "\"");
|
||||
return readExactNN(nnPath);
|
||||
} else {
|
||||
logger.info("computing brute-force exact KNN matches for " + numQueryVectors + " query vectors from \"" + queryPath + "\"");
|
||||
long startNS = System.nanoTime();
|
||||
// TODO: enable computing NN from high precision vectors when
|
||||
// checking low-precision recall
|
||||
int[][] nn;
|
||||
if (vectorEncoding.equals(VectorEncoding.BYTE)) {
|
||||
nn = computeExactNNByte(queryPath);
|
||||
} else {
|
||||
nn = computeExactNN(queryPath);
|
||||
}
|
||||
writeExactNN(nn, nnPath);
|
||||
long elapsedMS = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNS); // ns -> ms
|
||||
logger.info("computed " + numQueryVectors + " exact matches in " + elapsedMS + " ms");
|
||||
return nn;
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isNewer(Path path, Path... others) throws IOException {
|
||||
FileTime modified = Files.getLastModifiedTime(path);
|
||||
for (Path other : others) {
|
||||
if (Files.getLastModifiedTime(other).compareTo(modified) >= 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
TopDocs doVectorQuery(byte[] vector, IndexSearcher searcher) throws IOException {
|
||||
Query knnQuery;
|
||||
if (overSamplingFactor > 1f) {
|
||||
throw new IllegalArgumentException("oversampling factor > 1 is not supported for byte vectors");
|
||||
}
|
||||
if (indexType == KnnIndexTester.IndexType.IVF) {
|
||||
throw new IllegalArgumentException("IVF index type does not support byte vectors");
|
||||
} else {
|
||||
knnQuery = new ESKnnByteVectorQuery(
|
||||
VECTOR_FIELD,
|
||||
vector,
|
||||
topK,
|
||||
efSearch,
|
||||
null,
|
||||
DenseVectorFieldMapper.FilterHeuristic.ACORN.getKnnSearchStrategy()
|
||||
);
|
||||
}
|
||||
QueryProfiler profiler = new QueryProfiler();
|
||||
TopDocs docs = searcher.search(knnQuery, this.topK);
|
||||
QueryProfilerProvider queryProfilerProvider = (QueryProfilerProvider) knnQuery;
|
||||
queryProfilerProvider.profile(profiler);
|
||||
return new TopDocs(new TotalHits(profiler.getVectorOpsCount(), docs.totalHits.relation()), docs.scoreDocs);
|
||||
}
|
||||
|
||||
TopDocs doVectorQuery(float[] vector, IndexSearcher searcher) throws IOException {
|
||||
Query knnQuery;
|
||||
int topK = this.topK;
|
||||
int efSearch = this.efSearch;
|
||||
if (overSamplingFactor > 1f) {
|
||||
// oversample the topK results to get more candidates for the final result
|
||||
topK = (int) Math.ceil(topK * overSamplingFactor);
|
||||
efSearch = Math.max(topK, efSearch);
|
||||
}
|
||||
if (indexType == KnnIndexTester.IndexType.IVF) {
|
||||
knnQuery = new IVFKnnFloatVectorQuery(VECTOR_FIELD, vector, topK, efSearch, null, nProbe);
|
||||
} else {
|
||||
knnQuery = new ESKnnFloatVectorQuery(
|
||||
VECTOR_FIELD,
|
||||
vector,
|
||||
topK,
|
||||
efSearch,
|
||||
null,
|
||||
DenseVectorFieldMapper.FilterHeuristic.ACORN.getKnnSearchStrategy()
|
||||
);
|
||||
}
|
||||
if (overSamplingFactor > 1f) {
|
||||
// oversample the topK results to get more candidates for the final result
|
||||
knnQuery = new RescoreKnnVectorQuery(VECTOR_FIELD, vector, similarityFunction, this.topK, knnQuery);
|
||||
}
|
||||
QueryProfiler profiler = new QueryProfiler();
|
||||
TopDocs docs = searcher.search(knnQuery, this.topK);
|
||||
QueryProfilerProvider queryProfilerProvider = (QueryProfilerProvider) knnQuery;
|
||||
queryProfilerProvider.profile(profiler);
|
||||
return new TopDocs(new TotalHits(profiler.getVectorOpsCount(), docs.totalHits.relation()), docs.scoreDocs);
|
||||
}
|
||||
|
||||
private static float checkResults(int[][] results, int[][] nn, int topK) {
|
||||
int totalMatches = 0;
|
||||
int totalResults = results.length * topK;
|
||||
for (int i = 0; i < results.length; i++) {
|
||||
totalMatches += compareNN(nn[i], results[i], topK);
|
||||
}
|
||||
return totalMatches / (float) totalResults;
|
||||
}
|
||||
|
||||
private static int compareNN(int[] expected, int[] results, int topK) {
|
||||
int matched = 0;
|
||||
Set<Integer> expectedSet = new HashSet<>();
|
||||
Set<Integer> alreadySeen = new HashSet<>();
|
||||
for (int i = 0; i < topK; i++) {
|
||||
expectedSet.add(expected[i]);
|
||||
}
|
||||
for (int docId : results) {
|
||||
if (alreadySeen.add(docId) == false) {
|
||||
throw new IllegalStateException("duplicate docId=" + docId);
|
||||
}
|
||||
if (expectedSet.contains(docId)) {
|
||||
++matched;
|
||||
}
|
||||
}
|
||||
return matched;
|
||||
}
|
||||
|
||||
private int[][] readExactNN(Path nnPath) throws IOException {
|
||||
int[][] result = new int[numQueryVectors][];
|
||||
try (FileChannel in = FileChannel.open(nnPath)) {
|
||||
IntBuffer intBuffer = in.map(FileChannel.MapMode.READ_ONLY, 0, (long) numQueryVectors * topK * Integer.BYTES)
|
||||
.order(ByteOrder.LITTLE_ENDIAN)
|
||||
.asIntBuffer();
|
||||
for (int i = 0; i < numQueryVectors; i++) {
|
||||
result[i] = new int[topK];
|
||||
intBuffer.get(result[i]);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private void writeExactNN(int[][] nn, Path nnPath) throws IOException {
|
||||
logger.info("writing true nearest neighbors to cache file \"" + nnPath + "\"");
|
||||
ByteBuffer tmp = ByteBuffer.allocate(nn[0].length * Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN);
|
||||
try (OutputStream out = Files.newOutputStream(nnPath)) {
|
||||
for (int i = 0; i < numQueryVectors; i++) {
|
||||
tmp.asIntBuffer().put(nn[i]);
|
||||
out.write(tmp.array());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int[][] computeExactNN(Path queryPath) throws IOException {
|
||||
int[][] result = new int[numQueryVectors][];
|
||||
try (Directory dir = FSDirectory.open(indexPath); DirectoryReader reader = DirectoryReader.open(dir)) {
|
||||
List<Callable<Void>> tasks = new ArrayList<>();
|
||||
try (FileChannel qIn = FileChannel.open(queryPath)) {
|
||||
KnnIndexer.VectorReader queryReader = KnnIndexer.VectorReader.create(qIn, dim, VectorEncoding.FLOAT32);
|
||||
for (int i = 0; i < numQueryVectors; i++) {
|
||||
float[] queryVector = new float[dim];
|
||||
queryReader.next(queryVector);
|
||||
tasks.add(new ComputeNNFloatTask(i, topK, queryVector, result, reader, similarityFunction));
|
||||
}
|
||||
ForkJoinPool.commonPool().invokeAll(tasks);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
private int[][] computeExactNNByte(Path queryPath) throws IOException {
|
||||
int[][] result = new int[numQueryVectors][];
|
||||
try (Directory dir = FSDirectory.open(indexPath); DirectoryReader reader = DirectoryReader.open(dir)) {
|
||||
List<Callable<Void>> tasks = new ArrayList<>();
|
||||
try (FileChannel qIn = FileChannel.open(queryPath)) {
|
||||
KnnIndexer.VectorReader queryReader = KnnIndexer.VectorReader.create(qIn, dim, VectorEncoding.BYTE);
|
||||
for (int i = 0; i < numQueryVectors; i++) {
|
||||
byte[] queryVector = new byte[dim];
|
||||
queryReader.next(queryVector);
|
||||
tasks.add(new ComputeNNByteTask(i, queryVector, result, reader, similarityFunction));
|
||||
}
|
||||
ForkJoinPool.commonPool().invokeAll(tasks);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
static class ComputeNNFloatTask implements Callable<Void> {
|
||||
|
||||
private final int queryOrd;
|
||||
private final float[] query;
|
||||
private final int[][] result;
|
||||
private final IndexReader reader;
|
||||
private final VectorSimilarityFunction similarityFunction;
|
||||
private final int topK;
|
||||
|
||||
ComputeNNFloatTask(
|
||||
int queryOrd,
|
||||
int topK,
|
||||
float[] query,
|
||||
int[][] result,
|
||||
IndexReader reader,
|
||||
VectorSimilarityFunction similarityFunction
|
||||
) {
|
||||
this.queryOrd = queryOrd;
|
||||
this.query = query;
|
||||
this.result = result;
|
||||
this.reader = reader;
|
||||
this.similarityFunction = similarityFunction;
|
||||
this.topK = topK;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Void call() {
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
try {
|
||||
var queryVector = new ConstKnnFloatValueSource(query);
|
||||
var docVectors = new FloatKnnVectorFieldSource(VECTOR_FIELD);
|
||||
Query query = new FunctionQuery(new FloatVectorSimilarityFunction(similarityFunction, queryVector, docVectors));
|
||||
var topDocs = searcher.search(query, topK);
|
||||
result[queryOrd] = getResultIds(topDocs, reader.storedFields());
|
||||
if ((queryOrd + 1) % 10 == 0) {
|
||||
logger.info(" exact knn scored " + (queryOrd + 1));
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
static class ComputeNNByteTask implements Callable<Void> {
|
||||
|
||||
private final int queryOrd;
|
||||
private final byte[] query;
|
||||
private final int[][] result;
|
||||
private final IndexReader reader;
|
||||
private final VectorSimilarityFunction similarityFunction;
|
||||
|
||||
ComputeNNByteTask(int queryOrd, byte[] query, int[][] result, IndexReader reader, VectorSimilarityFunction similarityFunction) {
|
||||
this.queryOrd = queryOrd;
|
||||
this.query = query;
|
||||
this.result = result;
|
||||
this.reader = reader;
|
||||
this.similarityFunction = similarityFunction;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Void call() {
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
int topK = result[0].length;
|
||||
try {
|
||||
var queryVector = new ConstKnnByteVectorValueSource(query);
|
||||
var docVectors = new ByteKnnVectorFieldSource(VECTOR_FIELD);
|
||||
Query query = new FunctionQuery(new ByteVectorSimilarityFunction(similarityFunction, queryVector, docVectors));
|
||||
var topDocs = searcher.search(query, topK);
|
||||
result[queryOrd] = getResultIds(topDocs, reader.storedFields());
|
||||
if ((queryOrd + 1) % 10 == 0) {
|
||||
logger.info(" exact knn scored " + (queryOrd + 1));
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
static int[] getResultIds(TopDocs topDocs, StoredFields storedFields) throws IOException {
|
||||
int[] resultIds = new int[topDocs.scoreDocs.length];
|
||||
int i = 0;
|
||||
for (ScoreDoc doc : topDocs.scoreDocs) {
|
||||
if (doc.doc != NO_MORE_DOCS) {
|
||||
// there is a bug somewhere that can result in doc=NO_MORE_DOCS! I think it happens
|
||||
// in some degenerate case (like input query has NaN in it?) that causes no results to
|
||||
// be returned from HNSW search?
|
||||
resultIds[i++] = Integer.parseInt(storedFields.document(doc.doc).get(ID_FIELD));
|
||||
}
|
||||
}
|
||||
return resultIds;
|
||||
}
|
||||
|
||||
}
|
|
@ -479,4 +479,6 @@ module org.elasticsearch.server {
|
|||
exports org.elasticsearch.lucene.util.automaton;
|
||||
exports org.elasticsearch.index.codec.perfield;
|
||||
exports org.elasticsearch.lucene.search;
|
||||
exports org.elasticsearch.index.codec.vectors to org.elasticsearch.test.knn;
|
||||
exports org.elasticsearch.index.codec.vectors.es818 to org.elasticsearch.test.knn;
|
||||
}
|
||||
|
|
|
@ -51,6 +51,7 @@ public abstract class IVFVectorsReader extends KnnVectorsReader {
|
|||
protected final IntObjectHashMap<FieldEntry> fields;
|
||||
private final FlatVectorsReader rawVectorsReader;
|
||||
|
||||
@SuppressWarnings("this-escape")
|
||||
protected IVFVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsReader) throws IOException {
|
||||
this.state = state;
|
||||
this.fieldInfos = state.fieldInfos;
|
||||
|
|
|
@ -55,6 +55,7 @@ public abstract class IVFVectorsWriter extends KnnVectorsWriter {
|
|||
private final FlatVectorsWriter rawVectorDelegate;
|
||||
private final SegmentWriteState segmentWriteState;
|
||||
|
||||
@SuppressWarnings("this-escape")
|
||||
protected IVFVectorsWriter(SegmentWriteState state, FlatVectorsWriter rawVectorDelegate) throws IOException {
|
||||
this.segmentWriteState = state;
|
||||
this.rawVectorDelegate = rawVectorDelegate;
|
||||
|
|
|
@ -69,6 +69,7 @@ public class ES816BinaryQuantizedVectorsReader extends FlatVectorsReader impleme
|
|||
private final FlatVectorsReader rawVectorsReader;
|
||||
private final ES816BinaryFlatVectorsScorer vectorScorer;
|
||||
|
||||
@SuppressWarnings("this-escape")
|
||||
ES816BinaryQuantizedVectorsReader(
|
||||
SegmentReadState state,
|
||||
FlatVectorsReader rawVectorsReader,
|
||||
|
|
|
@ -65,6 +65,7 @@ public class DirectIOLucene99FlatVectorsReader extends FlatVectorsReader impleme
|
|||
private final IndexInput vectorData;
|
||||
private final FieldInfos fieldInfos;
|
||||
|
||||
@SuppressWarnings("this-escape")
|
||||
public DirectIOLucene99FlatVectorsReader(SegmentReadState state, FlatVectorsScorer scorer) throws IOException {
|
||||
super(scorer);
|
||||
int versionMeta = readMetadata(state);
|
||||
|
|
|
@ -70,6 +70,7 @@ public class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader impleme
|
|||
private final FlatVectorsReader rawVectorsReader;
|
||||
private final ES818BinaryFlatVectorsScorer vectorScorer;
|
||||
|
||||
@SuppressWarnings("this-escape")
|
||||
ES818BinaryQuantizedVectorsReader(
|
||||
SegmentReadState state,
|
||||
FlatVectorsReader rawVectorsReader,
|
||||
|
|
|
@ -84,6 +84,7 @@ public class ES818BinaryQuantizedVectorsWriter extends FlatVectorsWriter {
|
|||
*
|
||||
* @param vectorsScorer the scorer to use for scoring vectors
|
||||
*/
|
||||
@SuppressWarnings("this-escape")
|
||||
protected ES818BinaryQuantizedVectorsWriter(
|
||||
ES818BinaryFlatVectorsScorer vectorsScorer,
|
||||
FlatVectorsWriter rawVectorDelegate,
|
||||
|
|
|
@ -171,3 +171,5 @@ if (extraProjects.exists()) {
|
|||
addSubProjects('', extraProjectDir)
|
||||
}
|
||||
}
|
||||
|
||||
include 'qa:vector'
|
|
@ -8,11 +8,11 @@
|
|||
*/
|
||||
|
||||
subprojects {
|
||||
apply plugin: 'elasticsearch.base-internal-es-plugin'
|
||||
apply plugin: 'elasticsearch.base-internal-es-plugin'
|
||||
|
||||
esplugin {
|
||||
name = it.name
|
||||
licenseFile = layout.settingsDirectory.file('licenses/AGPL-3.0+SSPL-1.0+ELASTIC-LICENSE-2.0.txt').asFile
|
||||
noticeFile = layout.settingsDirectory.file('NOTICE.txt').asFile
|
||||
}
|
||||
esplugin {
|
||||
name = it.name
|
||||
licenseFile = layout.settingsDirectory.file('licenses/AGPL-3.0+SSPL-1.0+ELASTIC-LICENSE-2.0.txt').asFile
|
||||
noticeFile = layout.settingsDirectory.file('NOTICE.txt').asFile
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue