mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-28 17:34:17 -04:00
Vector test tools (#128934)
This adds some testing tools for verifying vector recall and latency directly without having to spin up an entire ES node and running a rally track. Its pretty barebones and takes inspiration from lucene-util, but I wanted access to our own formats and tooling to make our lives easier. Here is an example config file. This will build the initial index, run queries at num_candidates: 50, then again at num_candidates 100 (without reindexing, and re-using the cached nearest neighbors). ``` [{ "doc_vectors" : "path", "query_vectors" : "path", "num_docs" : 10000, "num_queries" : 10, "index_type" : "hnsw", "num_candidates" : 50, "k" : 10, "hnsw_m" : 16, "hnsw_ef_construction" : 200, "index_threads" : 4, "reindex" : true, "force_merge" : false, "vector_space" : "maximum_inner_product", "dimensions" : 768 }, { "doc_vectors" : "path", "query_vectors" : "path", "num_docs" : 10000, "num_queries" : 10, "index_type" : "hnsw", "num_candidates" : 100, "k" : 10, "hnsw_m" : 16, "hnsw_ef_construction" : 200, "vector_space" : "maximum_inner_product", "dimensions" : 768 } ] ``` To execute: ``` ./gradlew :qa:vector:checkVec --args="/Path/to/knn_tester_config.json" ``` Calling `./gradlew :qa:vector:checkVecHelp` gives some guidance on how to use it, additionally providing a way to run it via java directly (useful to bypass gradlew guff).
This commit is contained in:
parent
ffa8927a9f
commit
155c0da00a
17 changed files with 2312 additions and 6 deletions
|
@ -479,4 +479,6 @@ module org.elasticsearch.server {
|
|||
exports org.elasticsearch.lucene.util.automaton;
|
||||
exports org.elasticsearch.index.codec.perfield;
|
||||
exports org.elasticsearch.lucene.search;
|
||||
exports org.elasticsearch.index.codec.vectors to org.elasticsearch.test.knn;
|
||||
exports org.elasticsearch.index.codec.vectors.es818 to org.elasticsearch.test.knn;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue