mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-28 01:22:26 -04:00
Add retrievers using the parser-only approach (#105470)
This enhancement adds a new abstraction to the _search API called "retriever." A retriever is something that returns top hits. This adds three initial retrievers called "standard", "knn", and "rrf". The retrievers use a parser-only approach where they are parsed and then translated into a SearchSourceBuilder to execute the actual search. --------- Co-authored-by: Mayya Sharipova <mayya.sharipova@elastic.co>
This commit is contained in:
parent
bfbb155985
commit
68b0acac8f
41 changed files with 3608 additions and 210 deletions
5
docs/changelog/105470.yaml
Normal file
5
docs/changelog/105470.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
pr: 105470
|
||||
summary: Add retrievers using the parser-only approach
|
||||
area: Ranking
|
||||
type: enhancement
|
||||
issues: []
|
|
@ -155,47 +155,55 @@ GET my-index/_search
|
|||
----
|
||||
// TEST[skip: TBD]
|
||||
|
||||
This can also be achieved by using sub searches combined with <<rrf>>.
|
||||
This can also be achieved using <<rrf, reciprocal rank fusion (RRF)>>,
|
||||
through an <<rrf-retriever, `rrf` retriever>> with multiple
|
||||
<<standard-retriever, `standard` retrievers>>.
|
||||
|
||||
[source,console]
|
||||
----
|
||||
GET my-index/_search
|
||||
{
|
||||
"sub_searches": [
|
||||
{
|
||||
"query": {
|
||||
"multi_match": {
|
||||
"query": "How is the weather in Jamaica?",
|
||||
"fields": [
|
||||
"title",
|
||||
"description"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"query": {
|
||||
"text_expansion": {
|
||||
"ml.inference.title_expanded.predicted_value": {
|
||||
"model_id": ".elser_model_2",
|
||||
"model_text": "How is the weather in Jamaica?"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"query": {
|
||||
"text_expansion": {
|
||||
"ml.inference.description_expanded.predicted_value": {
|
||||
"model_id": ".elser_model_2",
|
||||
"model_text": "How is the weather in Jamaica?"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"rank": {
|
||||
"retriever": {
|
||||
"rrf": {
|
||||
"retrievers": [
|
||||
{
|
||||
"standard": {
|
||||
"query": {
|
||||
"multi_match": {
|
||||
"query": "How is the weather in Jamaica?",
|
||||
"fields": [
|
||||
"title",
|
||||
"description"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"standard": {
|
||||
"query": {
|
||||
"text_expansion": {
|
||||
"ml.inference.title_expanded.predicted_value": {
|
||||
"model_id": ".elser_model_2",
|
||||
"model_text": "How is the weather in Jamaica?"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"standard": {
|
||||
"query": {
|
||||
"text_expansion": {
|
||||
"ml.inference.description_expanded.predicted_value": {
|
||||
"model_id": ".elser_model_2",
|
||||
"model_text": "How is the weather in Jamaica?"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"window_size": 10,
|
||||
"rank_constant": 20
|
||||
}
|
||||
|
|
|
@ -1281,3 +1281,34 @@ Default: 1, the primary shard.
|
|||
See <<index-wait-for-active-shards>>.
|
||||
--
|
||||
end::wait_for_active_shards[]
|
||||
|
||||
tag::rrf-retrievers[]
|
||||
`retrievers`::
|
||||
(Required, array of retriever objects)
|
||||
+
|
||||
A list of child retrievers to specify which sets of returned top documents
|
||||
will have the RRF formula applied to them. Each child retriever carries an
|
||||
equal weight as part of the RRF formula. Two or more child retrievers are
|
||||
required.
|
||||
end::rrf-retrievers[]
|
||||
|
||||
tag::rrf-rank-constant[]
|
||||
`rank_constant`::
|
||||
(Optional, integer)
|
||||
+
|
||||
This value determines how much influence documents in individual
|
||||
result sets per query have over the final ranked result set. A higher value indicates
|
||||
that lower ranked documents have more influence. This value must be greater than or
|
||||
equal to `1`. Defaults to `60`.
|
||||
end::rrf-rank-constant[]
|
||||
|
||||
tag::rrf-window-size[]
|
||||
`window_size`::
|
||||
(Optional, integer)
|
||||
+
|
||||
This value determines the size of the individual result sets per
|
||||
query. A higher value will improve result relevance at the cost of performance. The final
|
||||
ranked result set is pruned down to the search request's <<search-size-param, size>>.
|
||||
`window_size` must be greater than or equal to `size` and greater than or equal to `1`.
|
||||
Defaults to the `size` parameter.
|
||||
end::rrf-window-size[]
|
||||
|
|
|
@ -52,6 +52,8 @@ include::search/point-in-time-api.asciidoc[]
|
|||
|
||||
include::search/knn-search.asciidoc[]
|
||||
|
||||
include::search/retriever.asciidoc[]
|
||||
|
||||
include::search/rrf.asciidoc[]
|
||||
|
||||
include::search/scroll-api.asciidoc[]
|
||||
|
|
225
docs/reference/search/retriever.asciidoc
Normal file
225
docs/reference/search/retriever.asciidoc
Normal file
|
@ -0,0 +1,225 @@
|
|||
[[retriever]]
|
||||
=== Retriever API
|
||||
|
||||
preview::["This functionality is in technical preview and may be changed or removed in a future release. The syntax will likely change before GA. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."]
|
||||
|
||||
A retriever is a specification to describe top documents returned from a
|
||||
search. A retriever replaces other elements of the <<search-search, search API>>
|
||||
that also return top documents such as <<query-dsl, `query`>> and
|
||||
<<search-api-knn, `knn`>>. A retriever may have child retrievers where a
|
||||
retriever with two or more children is considered a compound retriever. This
|
||||
allows for complex behavior to be depicted in a tree-like structure, called
|
||||
the retriever tree, to better clarify the order of operations that occur
|
||||
during a search.
|
||||
|
||||
The following retrievers are available:
|
||||
|
||||
`standard`::
|
||||
A <<standard-retriever, retriever>> that replaces the functionality of a traditional <<query-dsl, query>>.
|
||||
|
||||
`knn`::
|
||||
A <<knn-retriever, retriever>> that replaces the functionality of a <<search-api-knn, knn search>>.
|
||||
|
||||
`rrf`::
|
||||
A <<rrf-retriever, retriever>> that produces top documents from <<rrf, reciprocal rank fusion (RRF)>>.
|
||||
|
||||
[[standard-retriever]]
|
||||
==== Standard Retriever
|
||||
|
||||
A standard retriever returns top documents from a traditional <<query-dsl, query>>.
|
||||
|
||||
===== Parameters:
|
||||
|
||||
`query`::
|
||||
(Optional, <<query-dsl, query object>>)
|
||||
+
|
||||
Defines a query to retrieve a set of top documents.
|
||||
|
||||
`filter`::
|
||||
(Optional, <<query-dsl, query object or list of query objects>>)
|
||||
+
|
||||
Applies a <<query-dsl-bool-query, boolean query filter>> to this retriever
|
||||
where all documents must match this query but do not contribute to the score.
|
||||
|
||||
`search_after`::
|
||||
(Optional, <<search-after, search after object>>)
|
||||
+
|
||||
Defines a search after object parameter used for pagination.
|
||||
|
||||
include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=terminate_after]
|
||||
|
||||
`sort`::
|
||||
+
|
||||
(Optional, <<sort-search-results, sort object>>)
|
||||
A sort object that that specifies the order of matching documents.
|
||||
|
||||
`min_score`::
|
||||
(Optional, `float`)
|
||||
+
|
||||
Minimum <<relevance-scores, `_score`>> for matching documents. Documents with a
|
||||
lower `_score` are not included in the top documents.
|
||||
|
||||
`collapse`::
|
||||
(Optional, <<collapse-search-results, collapse object>>)
|
||||
+
|
||||
Collapses the top documents by a specified key into a single top document per key.
|
||||
|
||||
===== Restrictions
|
||||
|
||||
When a retriever tree contains a compound retriever (a retriever with two or more child
|
||||
retrievers) *only* the query element is allowed.
|
||||
|
||||
===== Example
|
||||
|
||||
[source,js]
|
||||
----
|
||||
GET /index/_search
|
||||
{
|
||||
"retriever": {
|
||||
"standard": {
|
||||
"query" { ... },
|
||||
"filter" { ... },
|
||||
"min_score": ...
|
||||
}
|
||||
},
|
||||
"size": ...
|
||||
}
|
||||
----
|
||||
// NOTCONSOLE
|
||||
|
||||
[[knn-retriever]]
|
||||
==== kNN Retriever
|
||||
|
||||
A kNN retriever returns top documents from a <<knn-search, k-nearest neighbor search (kNN)>>.
|
||||
|
||||
===== Parameters
|
||||
|
||||
`field`::
|
||||
(Required, string)
|
||||
+
|
||||
include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=knn-field]
|
||||
|
||||
`query_vector`::
|
||||
(Required if `query_vector_builder` is not defined, array of `float`)
|
||||
+
|
||||
include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=knn-query-vector]
|
||||
|
||||
`query_vector_builder`::
|
||||
(Required if `query_vector` is not defined, query vector builder object)
|
||||
+
|
||||
Defines a <<knn-semantic-search, model>> to build a query vector.
|
||||
|
||||
`k`::
|
||||
(Required, integer)
|
||||
+
|
||||
Number of nearest neighbors to return as top hits. This value must be fewer than
|
||||
or equal to `num_candidates`.
|
||||
|
||||
`num_candidates`::
|
||||
(Required, integer)
|
||||
+
|
||||
include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=knn-num-candidates]
|
||||
|
||||
`filter`::
|
||||
(Optional, <<query-dsl, query object or list of query objects>>)
|
||||
+
|
||||
include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=knn-filter]
|
||||
|
||||
`similarity`::
|
||||
(Optional, float)
|
||||
+
|
||||
include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=knn-similarity]
|
||||
|
||||
===== Restrictions
|
||||
|
||||
The parameters `query_vector` and `query_vector_builder` cannot be used together.
|
||||
|
||||
===== Example:
|
||||
|
||||
[source,js]
|
||||
----
|
||||
GET /index/_search
|
||||
{
|
||||
"retriever": {
|
||||
"knn": {
|
||||
"field": ...,
|
||||
"query_vector": ...,
|
||||
"k": ...,
|
||||
"num_candidates": ...
|
||||
}
|
||||
}
|
||||
}
|
||||
----
|
||||
// NOTCONSOLE
|
||||
|
||||
[[rrf-retriever]]
|
||||
==== RRF Retriever
|
||||
|
||||
An <<rrf, RRF>> retriever returns top documents based on the RRF formula
|
||||
equally weighting two or more child retrievers.
|
||||
|
||||
===== Parameters
|
||||
|
||||
include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=rrf-retrievers]
|
||||
|
||||
include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=rrf-rank-constant]
|
||||
|
||||
include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=rrf-window-size]
|
||||
|
||||
===== Restrictions
|
||||
|
||||
An RRF retriever is a compound retriever. Child retrievers may not use
|
||||
elements that are restricted by having a compound retriever as part of
|
||||
the retriever tree.
|
||||
|
||||
===== Example
|
||||
|
||||
[source,js]
|
||||
----
|
||||
GET /index/_search
|
||||
{
|
||||
"retriever": {
|
||||
"rrf": {
|
||||
"retrievers": [
|
||||
{
|
||||
"standard" { ... }
|
||||
},
|
||||
{
|
||||
"knn": { ... }
|
||||
}
|
||||
],
|
||||
"rank_constant": ...
|
||||
"window_size": ...
|
||||
}
|
||||
}
|
||||
}
|
||||
----
|
||||
// NOTCONSOLE
|
||||
|
||||
==== Using `from` and `size` with a retriever tree
|
||||
|
||||
The <<search-from-param, `from`>> and <<search-size-param, `size`>>
|
||||
parameters are provided globally as part of the general
|
||||
<<search-search, search API>>. They are applied to all retrievers in a
|
||||
retriever tree unless a specific retriever overrides the `size` parameter
|
||||
using a different parameter such as `window_size`. Though, the final
|
||||
search hits are always limited to `size`.
|
||||
|
||||
==== Using aggregations with a retriever tree
|
||||
|
||||
<<search-aggregations, Aggregations>> are globally specified as part of a search request.
|
||||
The query used for an aggregation is the combination of all leaf retrievers as `should`
|
||||
clauses in a <<query-dsl-bool-query, boolean query>>.
|
||||
|
||||
==== Restrictions on search parameters when specifying a retriever
|
||||
|
||||
When a retriever is specified as part of a search the following elements are not allowed
|
||||
at the top-level and instead are only allowed as elements of specific retrievers:
|
||||
|
||||
* <<request-body-search-query, `query`>>
|
||||
* <<search-api-knn, `knn`>>
|
||||
* <<search-after, `search_after`>>
|
||||
* <<request-body-search-terminate-after, `terminate_after`>>
|
||||
* <<search-sort-param, `sort`>>
|
||||
* <<rescore, `rescore`>>
|
||||
* <<search-api-min-score, `min_score`>>
|
|
@ -32,28 +32,20 @@ return score
|
|||
==== Reciprocal rank fusion API
|
||||
|
||||
You can use RRF as part of a <<search-search, search>> to combine and rank
|
||||
documents using result sets from a combination of
|
||||
<<request-body-search-query, query>>,
|
||||
<<request-body-sub-searches, sub searches>>, and/or
|
||||
<<search-api-knn, knn searches>>. A minimum of 2 results sets
|
||||
is required for ranking from the specified sources.
|
||||
documents using separate sets of top documents (result sets) from a
|
||||
combination of <<retriever, child retrievers>> using an
|
||||
<<rrf-retriever, RRF retriever>>. A minimum of *two* child retrievers is
|
||||
required for ranking.
|
||||
|
||||
The `rrf` parameter is an optional object defined as part of a search request's
|
||||
<<request-body-rank, rank parameter>>. The `rrf` object contains the following
|
||||
parameters:
|
||||
An RRF retriever is an optional object defined as part of a search request's
|
||||
<<request-body-retriever, retriever parameter>>. The RRF retriever object contains
|
||||
the following parameters:
|
||||
|
||||
`rank_constant`::
|
||||
(Optional, integer) This value determines how much influence documents in individual
|
||||
result sets per query have over the final ranked result set. A higher value indicates
|
||||
that lower ranked documents have more influence. This value must be greater than or
|
||||
equal to `1`. Defaults to `60`.
|
||||
include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=rrf-retrievers]
|
||||
|
||||
`window_size`::
|
||||
(Optional, integer) This value determines the size of the individual result sets per
|
||||
query. A higher value will improve result relevance at the cost of performance. The final
|
||||
ranked result set is pruned down to the search request's <<search-size-param, size>>.
|
||||
`window_size` must be greater than or equal to `size` and greater than or equal to `1`.
|
||||
Defaults to `100`.
|
||||
include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=rrf-rank-constant]
|
||||
|
||||
include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=rrf-window-size]
|
||||
|
||||
An example request using RRF:
|
||||
|
||||
|
@ -61,19 +53,27 @@ An example request using RRF:
|
|||
----
|
||||
GET example-index/_search
|
||||
{
|
||||
"query": {
|
||||
"term": {
|
||||
"text": "shoes"
|
||||
}
|
||||
},
|
||||
"knn": {
|
||||
"field": "vector",
|
||||
"query_vector": [1.25, 2, 3.5],
|
||||
"k": 50,
|
||||
"num_candidates": 100
|
||||
},
|
||||
"rank": {
|
||||
"rrf": {
|
||||
"retriever": {
|
||||
"rrf": { <3>
|
||||
"retrievers": [
|
||||
{
|
||||
"standard": { <2>
|
||||
"query": {
|
||||
"term": {
|
||||
"text": "shoes"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"knn": { <1>
|
||||
"field": "vector",
|
||||
"query_vector": [1.25, 2, 3.5],
|
||||
"k": 50,
|
||||
"num_candidates": 100
|
||||
}
|
||||
}
|
||||
],
|
||||
"window_size": 50,
|
||||
"rank_constant": 20
|
||||
}
|
||||
|
@ -82,10 +82,17 @@ GET example-index/_search
|
|||
----
|
||||
// TEST[skip:example fragment]
|
||||
|
||||
In the above example, we first execute the kNN search to get its global top 50 results.
|
||||
Then we execute the query to get its global top 50 results. Afterwards, on a coordinating
|
||||
node, we combine the knn search results with the query results and rank them based on the
|
||||
RRF method to get the final top 10 results.
|
||||
In the above example, we execute the `knn` and `standard` retrievers
|
||||
independently of each other. Then we use the `rrf` retriever to combine
|
||||
the results.
|
||||
|
||||
<1> First, we execute the kNN search specified by the `knn` retriever to
|
||||
get its global top 50 results.
|
||||
<2> Second, we execute the query specified by the `standard` retriever to get
|
||||
its global top 50 results.
|
||||
<3> Then, on a coordinating node, we combine the kNN search top documents with
|
||||
the query top documents and rank them based on the RRF formula using parameters from
|
||||
the `rrf` retriever to get the combined top documents using the default `size` of `10`.
|
||||
|
||||
Note that if `k` from a knn search is larger than `window_size`, the results are
|
||||
truncated to `window_size`. If `k` is smaller than `window_size`, the results are
|
||||
|
@ -94,13 +101,12 @@ truncated to `window_size`. If `k` is smaller than `window_size`, the results ar
|
|||
[[rrf-supported-features]]
|
||||
==== Reciprocal rank fusion supported features
|
||||
|
||||
RRF does support:
|
||||
The `rrf` retriever supports:
|
||||
|
||||
* <<request-body-sub-searches, sub searches>>
|
||||
* <<search-aggregations, aggregations>>
|
||||
* <<search-from-param, from>>
|
||||
|
||||
RRF does not currently support:
|
||||
The `rrf` retriever does not currently support:
|
||||
|
||||
* <<search-api-scroll-query-param, scroll>>
|
||||
* <<search-api-pit, point in time>>
|
||||
|
@ -112,42 +118,48 @@ RRF does not currently support:
|
|||
* <<request-body-search-explain, explain>>
|
||||
* <<profiling-queries, profiling>>
|
||||
|
||||
Using unsupported features as part of a search with RRF results
|
||||
Using unsupported features as part of a search with an `rrf` retriever results
|
||||
in an exception.
|
||||
|
||||
[[rrf-using-sub-searches]]
|
||||
==== Reciprocal rank fusion using sub searches
|
||||
[[rrf-using-multiple-standard-retrievers]]
|
||||
==== Reciprocal rank fusion using multiple standard retrievers
|
||||
|
||||
<<request-body-sub-searches, Sub searches>> provides a way to
|
||||
combine and rank multiple searches using RRF.
|
||||
The `rrf` retriever provides a way to combine and rank multiple
|
||||
`standard` retrievers. A primary use case is combining top documents
|
||||
from a traditional BM25 query and an <<semantic-search-elser, ELSER>>
|
||||
query to achieve improved relevance.
|
||||
|
||||
An example request using RRF with sub searches:
|
||||
An example request using RRF with multiple standard retrievers:
|
||||
|
||||
[source,console]
|
||||
----
|
||||
GET example-index/_search
|
||||
{
|
||||
"sub_searches": [
|
||||
{
|
||||
"query": {
|
||||
"term": {
|
||||
"text": "blue shoes sale"
|
||||
"retriever": {
|
||||
"rrf": { <3>
|
||||
"retrievers": [
|
||||
{
|
||||
"standard": { <1>
|
||||
"query": {
|
||||
"term": {
|
||||
"text": "blue shoes sale"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"standard": { <2>
|
||||
"query": {
|
||||
"text_expansion":{
|
||||
"ml.tokens":{
|
||||
"model_id":"my_elser_model",
|
||||
"model_text":"What blue shoes are on sale?"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"query": {
|
||||
"text_expansion":{
|
||||
"ml.tokens":{
|
||||
"model_id":"my_elser_model",
|
||||
"model_text":"What blue shoes are on sale?"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"rank": {
|
||||
"rrf": {
|
||||
],
|
||||
"window_size": 50,
|
||||
"rank_constant": 20
|
||||
}
|
||||
|
@ -156,17 +168,31 @@ GET example-index/_search
|
|||
----
|
||||
// TEST[skip:example fragment]
|
||||
|
||||
In the above example, we execute each of the two sub searches
|
||||
independently of each other. First we run the term query for
|
||||
`blue shoes sales` using the standard BM25 scoring algorithm. Then
|
||||
we run the text expansion query for `What blue shoes are on sale?`
|
||||
In the above example, we execute each of the two `standard` retrievers
|
||||
independently of each other. Then we use the `rrf` retriever to combine
|
||||
the results.
|
||||
|
||||
<1> First we run the `standard` retriever
|
||||
specifying a term query for `blue shoes sales` using the standard BM25
|
||||
scoring algorithm.
|
||||
<2> Next we run the `standard` retriever specifying a
|
||||
text expansion query for `What blue shoes are on sale?`
|
||||
using our <<semantic-search-elser, ELSER>> scoring algorithm.
|
||||
RRF allows us to combine the two results sets generated by completely
|
||||
independent scoring algorithms with equal weighting. Not only does this
|
||||
remove the need to figure out what the appropriate weighting would be
|
||||
using linear combination, but RRF is also shown to give improved
|
||||
<3> The `rrf` retriever allows us to combine the two top documents sets
|
||||
generated by completely independent scoring algorithms with equal weighting.
|
||||
|
||||
Not only does this remove the need to figure out what the appropriate
|
||||
weighting is using linear combination, but RRF is also shown to give improved
|
||||
relevance over either query individually.
|
||||
|
||||
[[rrf-using-sub-searches]]
|
||||
==== Reciprocal rank fusion using sub searches
|
||||
|
||||
RRF using sub searches is no longer supported. Use the
|
||||
<<retriever, retriever API>> instead. See
|
||||
<<rrf-using-multiple-standard-retrievers, using multiple standard retrievers>>
|
||||
for an example.
|
||||
|
||||
[[rrf-full-example]]
|
||||
==== Reciprocal rank fusion full example
|
||||
|
||||
|
@ -179,7 +205,7 @@ to explain.
|
|||
----
|
||||
PUT example-index
|
||||
{
|
||||
"mappings": {
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"text" : {
|
||||
"type" : "text"
|
||||
|
@ -234,26 +260,35 @@ POST example-index/_refresh
|
|||
----
|
||||
// TEST
|
||||
|
||||
We now execute a search using RRF with a query, a kNN search, and
|
||||
We now execute a search using an `rrf` retriever with a `standard` retriever
|
||||
specifying a BM25 query, a `knn` retriever specifying a kNN search, and
|
||||
a terms aggregation.
|
||||
|
||||
[source,console]
|
||||
----
|
||||
GET example-index/_search
|
||||
{
|
||||
"query": {
|
||||
"term": {
|
||||
"text": "rrf"
|
||||
}
|
||||
},
|
||||
"knn": {
|
||||
"field": "vector",
|
||||
"query_vector": [3],
|
||||
"k": 5,
|
||||
"num_candidates": 5
|
||||
},
|
||||
"rank": {
|
||||
"retriever": {
|
||||
"rrf": {
|
||||
"retrievers": [
|
||||
{
|
||||
"standard": {
|
||||
"query": {
|
||||
"term": {
|
||||
"text": "rrf"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"knn": {
|
||||
"field": "vector",
|
||||
"query_vector": [3],
|
||||
"k": 5,
|
||||
"num_candidates": 5
|
||||
}
|
||||
}
|
||||
],
|
||||
"window_size": 5,
|
||||
"rank_constant": 1
|
||||
}
|
||||
|
@ -351,10 +386,11 @@ use `_rank` to show our top-ranked documents.
|
|||
// TESTRESPONSE[s/: \.\.\./: $body.$_path/]
|
||||
|
||||
Let's break down how these hits were ranked. We
|
||||
start by running the query and the kNN search
|
||||
separately to collect what their individual hits are.
|
||||
start by running the `standard` retriever specifying a query
|
||||
and the `knn` retriever specifying a kNN search separately to
|
||||
collect what their individual hits are.
|
||||
|
||||
First, we look at the hits for the query.
|
||||
First, we look at the hits for the query from the `standard` retriever.
|
||||
|
||||
[source,console-result]
|
||||
----
|
||||
|
@ -407,7 +443,7 @@ First, we look at the hits for the query.
|
|||
<4> rank 4, `_id` 1
|
||||
|
||||
Note that our first hit doesn't have a value for the `vector` field. Now,
|
||||
we look at the results for the kNN search.
|
||||
we look at the results for the kNN search from the `knn` retriever.
|
||||
|
||||
[source,console-result]
|
||||
----
|
||||
|
@ -460,7 +496,8 @@ we look at the results for the kNN search.
|
|||
<4> rank 4, `_id` 5
|
||||
|
||||
We can now take the two individually ranked result sets and apply the
|
||||
RRF formula to them to get our final ranking.
|
||||
RRF formula to them using parameters from the `rrf` retriever to get
|
||||
our final ranking.
|
||||
|
||||
[source,python]
|
||||
----
|
||||
|
@ -478,4 +515,3 @@ truncating the bottom `2` docs in our RRF result set with a `size` of `3`.
|
|||
We end with `_id: 3` as `_rank: 1`, `_id: 2` as `_rank: 2`, and
|
||||
`_id: 4` as `_rank: 3`. This ranking matches the result set from the
|
||||
original RRF search as expected.
|
||||
|
||||
|
|
|
@ -215,31 +215,35 @@ PUT _application/search_application/my-search-app
|
|||
"lang": "mustache",
|
||||
"source": """
|
||||
{
|
||||
"sub_searches": [
|
||||
{{#text_fields}}
|
||||
{
|
||||
"query": {
|
||||
"match": {
|
||||
"{{.}}": "{{query_string}}"
|
||||
}
|
||||
}
|
||||
},
|
||||
{{/text_fields}}
|
||||
{{#elser_fields}}
|
||||
{
|
||||
"query": {
|
||||
"text_expansion": {
|
||||
"ml.inference.{{.}}_expanded.predicted_value": {
|
||||
"model_text": "{{query_string}}",
|
||||
"model_id": "<elser_model_id>"
|
||||
"retriever": {
|
||||
"rrf": {
|
||||
"retrievers": [
|
||||
{{#text_fields}}
|
||||
{
|
||||
"standard": {
|
||||
"query": {
|
||||
"match": {
|
||||
"{{.}}": "{{query_string}}"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{{/text_fields}}
|
||||
{{#elser_fields}}
|
||||
{
|
||||
"standard": {
|
||||
"query": {
|
||||
"text_expansion": {
|
||||
"ml.inference.{{.}}_expanded.predicted_value": {
|
||||
"model_text": "{{query_string}}",
|
||||
"model_id": "<elser_model_id>"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{{/elser_fields}}
|
||||
],
|
||||
"rank": {
|
||||
"rrf": {
|
||||
{{/elser_fields}}
|
||||
],
|
||||
"window_size": {{rrf.window_size}},
|
||||
"rank_constant": {{rrf.rank_constant}}
|
||||
}
|
||||
|
|
|
@ -558,12 +558,20 @@ Period of time used to extend the life of the PIT.
|
|||
(Optional, <<query-dsl,query object>>) Defines the search definition using the
|
||||
<<query-dsl,Query DSL>>.
|
||||
|
||||
[[request-body-retriever]]
|
||||
`retriever`::
|
||||
preview:[]
|
||||
(Optional, <<retriever, retriever object>>) Defines a top-level retriever to specify
|
||||
a desired set of top documents instead of a standard query or knn search.
|
||||
|
||||
[[request-body-rank]]
|
||||
`rank`::
|
||||
preview:[]
|
||||
This param is in technical preview and may change in the future. The syntax will
|
||||
likely change before GA.
|
||||
+
|
||||
This parameter is deprecated and will be removed. Use <<retriever, retriever>> instead.
|
||||
+
|
||||
(Optional, object)
|
||||
Defines a method for combining and ranking result sets from
|
||||
a combination of <<request-body-search-query, query>>,
|
||||
|
@ -731,6 +739,8 @@ preview:[]
|
|||
This param is in technical preview and may change in the future. The syntax will
|
||||
likely change before GA.
|
||||
+
|
||||
This parameter is deprecated and will be removed. Use <<retriever, retriever>> instead.
|
||||
+
|
||||
(Optional, array of objects)
|
||||
An array of `sub_search` objects where each `sub_search` is evaluated
|
||||
independently, and their result sets are later combined as part of
|
||||
|
@ -752,6 +762,7 @@ with a top-level <<request-body-search-query, `query`>> element.
|
|||
----
|
||||
// NOTCONSOLE
|
||||
|
||||
[[request-body-search-terminate-after]]
|
||||
include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=terminate_after]
|
||||
+
|
||||
Defaults to `0`, which does not terminate query execution early.
|
||||
|
|
|
@ -1,36 +1,41 @@
|
|||
// tag::elser[]
|
||||
|
||||
Hybrid search between a semantic and lexical query can be achieved by using a
|
||||
`sub_searches` clause in your search request. In the `sub_searches` clause,
|
||||
provide a `text_expansion` query and a full-text query. Next to the
|
||||
`sub_searches` clause, also provide a <<request-body-rank,`rank`>> clause with
|
||||
the `rrf` parameter to rank documents using reciprocal rank fusion.
|
||||
Hybrid search between a semantic and lexical query can be achieved by using an
|
||||
<<rrf-retriever, `rrf` retriever> as part of your search request. Provide a
|
||||
`text_expansion` query and a full-text query as
|
||||
<<standard-retriever, `standard` retrievers>> for the `rrf` retriever. The `rrf`
|
||||
retriever uses <<rrf, reciprocal rank fusion>> to rank the top documents.
|
||||
|
||||
[source,console]
|
||||
----
|
||||
GET my-index/_search
|
||||
{
|
||||
"sub_searches": [
|
||||
{
|
||||
"query": {
|
||||
"match": {
|
||||
"my_text_field": "the query string"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"query": {
|
||||
"text_expansion": {
|
||||
"my_tokens": {
|
||||
"model_id": ".elser_model_2",
|
||||
"model_text": "the query string"
|
||||
"retriever": {
|
||||
"rrf": {
|
||||
"retrievers": [
|
||||
{
|
||||
"standard": {
|
||||
"query": {
|
||||
"match": {
|
||||
"my_text_field": "the query string"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"standard": {
|
||||
"query": {
|
||||
"text_expansion": {
|
||||
"my_tokens": {
|
||||
"model_id": ".elser_model_2",
|
||||
"model_text": "the query string"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"rank": {
|
||||
"rrf": {}
|
||||
}
|
||||
}
|
||||
----
|
||||
|
@ -43,36 +48,44 @@ GET my-index/_search
|
|||
|
||||
Hybrid search between a semantic and lexical query can be achieved by providing:
|
||||
|
||||
* a `query` clause for the full-text query;
|
||||
* a `knn` clause with the kNN search that queries the dense vector field;
|
||||
* and a `rank` clause with the `rrf` parameter to rank documents using
|
||||
reciprocal rank fusion.
|
||||
* an `rrf` retriever to rank top documents using <<rrf, reciprocal rank fusion>>
|
||||
* a `standard` retriever as a child retriever with `query` clause for the full-text query
|
||||
* a `knn` retriever as a child retriever with the kNN search that queries the dense vector field
|
||||
|
||||
[source,console]
|
||||
----
|
||||
GET my-index/_search
|
||||
{
|
||||
"query": {
|
||||
"match": {
|
||||
"my_text_field": "the query string"
|
||||
"retriever": {
|
||||
"rrf": {
|
||||
"retrievers": [
|
||||
{
|
||||
"standard": {
|
||||
"query": {
|
||||
"match": {
|
||||
"my_text_field": "the query string"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"knn": {
|
||||
"field": "text_embedding.predicted_value",
|
||||
"k": 10,
|
||||
"num_candidates": 100,
|
||||
"query_vector_builder": {
|
||||
"text_embedding": {
|
||||
"model_id": "sentence-transformers__msmarco-minilm-l-12-v3",
|
||||
"model_text": "the query string"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"knn": {
|
||||
"field": "text_embedding.predicted_value",
|
||||
"k": 10,
|
||||
"num_candidates": 100,
|
||||
"query_vector_builder": {
|
||||
"text_embedding": {
|
||||
"model_id": "sentence-transformers__msmarco-minilm-l-12-v3",
|
||||
"model_text": "the query string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"rank": {
|
||||
"rrf": {}
|
||||
}
|
||||
}
|
||||
----
|
||||
// TEST[skip:TBD]
|
||||
|
||||
// end::dense-vector[]
|
||||
// end::dense-vector[]
|
||||
|
|
|
@ -0,0 +1,519 @@
|
|||
setup:
|
||||
- skip:
|
||||
version: ' - 8.13.99'
|
||||
reason: 'standard retriever added in 8.14'
|
||||
- do:
|
||||
indices.create:
|
||||
index: animals
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
number_of_replicas: 0
|
||||
mappings:
|
||||
properties:
|
||||
type:
|
||||
type: keyword
|
||||
name:
|
||||
type: text
|
||||
fields:
|
||||
raw:
|
||||
type: keyword
|
||||
color:
|
||||
type: keyword
|
||||
count:
|
||||
type: integer
|
||||
|
||||
- do:
|
||||
bulk:
|
||||
refresh: true
|
||||
index: animals
|
||||
body:
|
||||
- '{"index": {"_id": 1 }}'
|
||||
- '{"type": "domestic", "name": "cow", "color": "brown", "count": 1}'
|
||||
- '{"index": {"_id": 2 }}'
|
||||
- '{"type": "domestic", "name": "cow cow", "color": "spotted", "count": 2}'
|
||||
- '{"index": {"_id": 3 }}'
|
||||
- '{"type": "domestic", "name": "cow cow cow", "color": "spotted", "count": 3}'
|
||||
- '{"index": {"_id": 4 }}'
|
||||
- '{"type": "domestic", "name": "pig", "color": "pink", "count": 4}'
|
||||
- '{"index": {"_id": 5 }}'
|
||||
- '{"type": "domestic", "name": "pig pig", "color": "pink", "count": 5}'
|
||||
- '{"index": {"_id": 6 }}'
|
||||
- '{"type": "domestic", "name": "pig pig pig", "color": "spotted", "count": 6}'
|
||||
- '{"index": {"_id": 7 }}'
|
||||
- '{"type": "domestic", "name": "chicken", "color": "white", "count": 7}'
|
||||
- '{"index": {"_id": 8 }}'
|
||||
- '{"type": "domestic", "name": "chicken chicken", "color": "brown", "count": 8}'
|
||||
- '{"index": {"_id": 9 }}'
|
||||
- '{"type": "domestic", "name": "chicken chicken chicken", "color": "spotted", "count": 9}'
|
||||
- '{"index": {"_id": 10 }}'
|
||||
- '{"type": "wild", "name": "coyote", "color": "gray", "count": 10}'
|
||||
- '{"index": {"_id": 11 }}'
|
||||
- '{"type": "wild", "name": "coyote coyote", "color": "gray", "count": 11}'
|
||||
- '{"index": {"_id": 12 }}'
|
||||
- '{"type": "wild", "name": "coyote coyote coyote", "color": "white", "count": 12}'
|
||||
- '{"index": {"_id": 13 }}'
|
||||
- '{"type": "wild", "name": "rabbit", "color": "brown", "count": 13}'
|
||||
- '{"index": {"_id": 14 }}'
|
||||
- '{"type": "wild", "name": "rabbit rabbit", "color": "spotted", "count": 14}'
|
||||
- '{"index": {"_id": 15 }}'
|
||||
- '{"type": "wild", "name": "rabbit rabbit rabbit", "color": "white", "count": 15}'
|
||||
|
||||
---
|
||||
"standard retriever basic":
|
||||
- do:
|
||||
search:
|
||||
index: animals
|
||||
body:
|
||||
fields: [ "name", "count" ]
|
||||
retriever:
|
||||
standard:
|
||||
query:
|
||||
match:
|
||||
name: "cow"
|
||||
|
||||
- match: {hits.total.value: 3}
|
||||
|
||||
- match: {hits.hits.0._id: "3"}
|
||||
- match: {hits.hits.0.fields.name.0: "cow cow cow"}
|
||||
- match: {hits.hits.0.fields.count.0: 3}
|
||||
|
||||
- match: {hits.hits.1._id: "2"}
|
||||
- match: {hits.hits.1.fields.name.0: "cow cow"}
|
||||
- match: {hits.hits.1.fields.count.0: 2}
|
||||
|
||||
- match: {hits.hits.2._id: "1"}
|
||||
- match: {hits.hits.2.fields.name.0: "cow"}
|
||||
- match: {hits.hits.2.fields.count.0: 1}
|
||||
|
||||
---
|
||||
"standard retriever single sort":
|
||||
- do:
|
||||
search:
|
||||
index: animals
|
||||
body:
|
||||
fields: [ "name", "count" ]
|
||||
retriever:
|
||||
standard:
|
||||
query:
|
||||
term:
|
||||
color: "spotted"
|
||||
sort: [
|
||||
{
|
||||
name.raw: "asc"
|
||||
}
|
||||
]
|
||||
|
||||
- match: {hits.total.value: 5}
|
||||
|
||||
- match: {hits.hits.0._id: "9"}
|
||||
- match: {hits.hits.0.fields.name.0: "chicken chicken chicken"}
|
||||
- match: {hits.hits.0.fields.count.0: 9}
|
||||
|
||||
- match: {hits.hits.1._id: "2"}
|
||||
- match: {hits.hits.1.fields.name.0: "cow cow"}
|
||||
- match: {hits.hits.1.fields.count.0: 2}
|
||||
|
||||
- match: {hits.hits.2._id: "3"}
|
||||
- match: {hits.hits.2.fields.name.0: "cow cow cow"}
|
||||
- match: {hits.hits.2.fields.count.0: 3}
|
||||
|
||||
- match: {hits.hits.3._id: "6"}
|
||||
- match: {hits.hits.3.fields.name.0: "pig pig pig"}
|
||||
- match: {hits.hits.3.fields.count.0: 6}
|
||||
|
||||
- match: {hits.hits.4._id: "14"}
|
||||
- match: {hits.hits.4.fields.name.0: "rabbit rabbit"}
|
||||
- match: {hits.hits.4.fields.count.0: 14}
|
||||
|
||||
---
|
||||
"standard retriever multi sort":
|
||||
- do:
|
||||
search:
|
||||
index: animals
|
||||
body:
|
||||
fields: [ "name", "count" ]
|
||||
retriever:
|
||||
standard:
|
||||
query:
|
||||
bool:
|
||||
should: [
|
||||
{
|
||||
term: {
|
||||
color: "spotted"
|
||||
}
|
||||
},
|
||||
{
|
||||
term: {
|
||||
color: "pink"
|
||||
}
|
||||
}
|
||||
]
|
||||
sort: [
|
||||
{
|
||||
color: "asc"
|
||||
},
|
||||
{
|
||||
count: "desc"
|
||||
}
|
||||
]
|
||||
|
||||
- match: {hits.total.value: 7}
|
||||
|
||||
- match: {hits.hits.0._id: "5"}
|
||||
- match: {hits.hits.0.fields.name.0: "pig pig"}
|
||||
- match: {hits.hits.0.fields.count.0: 5}
|
||||
|
||||
- match: {hits.hits.1._id: "4"}
|
||||
- match: {hits.hits.1.fields.name.0: "pig"}
|
||||
- match: {hits.hits.1.fields.count.0: 4}
|
||||
|
||||
- match: {hits.hits.2._id: "14"}
|
||||
- match: {hits.hits.2.fields.name.0: "rabbit rabbit"}
|
||||
- match: {hits.hits.2.fields.count.0: 14}
|
||||
|
||||
- match: {hits.hits.3._id: "9"}
|
||||
- match: {hits.hits.3.fields.name.0: "chicken chicken chicken"}
|
||||
- match: {hits.hits.3.fields.count.0: 9}
|
||||
|
||||
- match: {hits.hits.4._id: "6"}
|
||||
- match: {hits.hits.4.fields.name.0: "pig pig pig"}
|
||||
- match: {hits.hits.4.fields.count.0: 6}
|
||||
|
||||
- match: {hits.hits.5._id: "3"}
|
||||
- match: {hits.hits.5.fields.name.0: "cow cow cow"}
|
||||
- match: {hits.hits.5.fields.count.0: 3}
|
||||
|
||||
- match: {hits.hits.6._id: "2"}
|
||||
- match: {hits.hits.6.fields.name.0: "cow cow"}
|
||||
- match: {hits.hits.6.fields.count.0: 2}
|
||||
|
||||
---
|
||||
"standard retriever filter":
|
||||
- do:
|
||||
search:
|
||||
index: animals
|
||||
body:
|
||||
fields: [ "name", "count" ]
|
||||
retriever:
|
||||
standard:
|
||||
filter:
|
||||
bool:
|
||||
must_not:
|
||||
term:
|
||||
color: "spotted"
|
||||
query:
|
||||
match:
|
||||
name: "cow"
|
||||
|
||||
- match: {hits.total.value: 1}
|
||||
|
||||
- match: {hits.hits.0._id: "1"}
|
||||
- match: {hits.hits.0.fields.name.0: "cow"}
|
||||
- match: {hits.hits.0.fields.count.0: 1}
|
||||
|
||||
---
|
||||
"standard retriever multi filter":
|
||||
- do:
|
||||
search:
|
||||
index: animals
|
||||
body:
|
||||
fields: [ "name", "count" ]
|
||||
retriever:
|
||||
standard:
|
||||
filter: [
|
||||
{
|
||||
match: {
|
||||
name: "cow"
|
||||
}
|
||||
},
|
||||
{
|
||||
range: {
|
||||
count: {
|
||||
gt: 1,
|
||||
lt: 3
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
query:
|
||||
term:
|
||||
color: "spotted"
|
||||
|
||||
- match: {hits.total.value: 1}
|
||||
|
||||
- match: {hits.hits.0._id: "2"}
|
||||
- match: {hits.hits.0.fields.name.0: "cow cow"}
|
||||
- match: {hits.hits.0.fields.count.0: 2}
|
||||
|
||||
---
|
||||
"standard retriever filter no query":
|
||||
- do:
|
||||
search:
|
||||
index: animals
|
||||
body:
|
||||
fields: [ "name", "count" ]
|
||||
retriever:
|
||||
standard:
|
||||
filter: [
|
||||
{
|
||||
match: {
|
||||
name: "cow"
|
||||
}
|
||||
},
|
||||
{
|
||||
range: {
|
||||
count: {
|
||||
gt: 1,
|
||||
lt: 4
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
sort: [
|
||||
{
|
||||
count: "desc"
|
||||
}
|
||||
]
|
||||
|
||||
- match: {hits.total.value: 2}
|
||||
|
||||
- match: { hits.hits.0._id: "3" }
|
||||
- match: { hits.hits.0.fields.name.0: "cow cow cow" }
|
||||
- match: { hits.hits.0.fields.count.0: 3 }
|
||||
|
||||
- match: {hits.hits.1._id: "2"}
|
||||
- match: {hits.hits.1.fields.name.0: "cow cow"}
|
||||
- match: {hits.hits.1.fields.count.0: 2}
|
||||
|
||||
---
|
||||
"standard retriever search after":
|
||||
- do:
|
||||
search:
|
||||
index: animals
|
||||
body:
|
||||
size: 3
|
||||
fields: [ "name", "count" ]
|
||||
retriever:
|
||||
standard:
|
||||
query:
|
||||
bool:
|
||||
should: [
|
||||
{
|
||||
term: {
|
||||
color: "spotted"
|
||||
}
|
||||
},
|
||||
{
|
||||
term: {
|
||||
color: "pink"
|
||||
}
|
||||
}
|
||||
]
|
||||
sort: [
|
||||
{
|
||||
count: "desc"
|
||||
}
|
||||
]
|
||||
|
||||
- match: {hits.total.value: 7}
|
||||
|
||||
- match: {hits.hits.0._id: "14"}
|
||||
- match: {hits.hits.0.fields.name.0: "rabbit rabbit"}
|
||||
- match: {hits.hits.0.fields.count.0: 14}
|
||||
|
||||
- match: {hits.hits.1._id: "9"}
|
||||
- match: {hits.hits.1.fields.name.0: "chicken chicken chicken"}
|
||||
- match: {hits.hits.1.fields.count.0: 9}
|
||||
|
||||
- match: {hits.hits.2._id: "6"}
|
||||
- match: {hits.hits.2.fields.name.0: "pig pig pig"}
|
||||
- match: {hits.hits.2.fields.count.0: 6}
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: animals
|
||||
body:
|
||||
size: 3
|
||||
fields: [ "name", "count" ]
|
||||
retriever:
|
||||
standard:
|
||||
search_after: [ 6 ]
|
||||
query:
|
||||
bool:
|
||||
should: [
|
||||
{
|
||||
term: {
|
||||
color: "spotted"
|
||||
}
|
||||
},
|
||||
{
|
||||
term: {
|
||||
color: "pink"
|
||||
}
|
||||
}
|
||||
]
|
||||
sort: [
|
||||
{
|
||||
count: "desc"
|
||||
}
|
||||
]
|
||||
|
||||
- match: {hits.total.value: 7}
|
||||
|
||||
- match: {hits.hits.0._id: "5"}
|
||||
- match: {hits.hits.0.fields.name.0: "pig pig"}
|
||||
- match: {hits.hits.0.fields.count.0: 5}
|
||||
|
||||
- match: {hits.hits.1._id: "4"}
|
||||
- match: {hits.hits.1.fields.name.0: "pig"}
|
||||
- match: {hits.hits.1.fields.count.0: 4}
|
||||
|
||||
- match: {hits.hits.2._id: "3"}
|
||||
- match: {hits.hits.2.fields.name.0: "cow cow cow"}
|
||||
- match: {hits.hits.2.fields.count.0: 3}
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: animals
|
||||
body:
|
||||
size: 3
|
||||
fields: [ "name", "count" ]
|
||||
retriever:
|
||||
standard:
|
||||
search_after: [ 3 ]
|
||||
query:
|
||||
bool:
|
||||
should: [
|
||||
{
|
||||
term: {
|
||||
color: "spotted"
|
||||
}
|
||||
},
|
||||
{
|
||||
term: {
|
||||
color: "pink"
|
||||
}
|
||||
}
|
||||
]
|
||||
sort: [
|
||||
{
|
||||
count: "desc"
|
||||
}
|
||||
]
|
||||
|
||||
- match: {hits.total.value: 7}
|
||||
|
||||
- match: {hits.hits.0._id: "2"}
|
||||
- match: {hits.hits.0.fields.name.0: "cow cow"}
|
||||
- match: {hits.hits.0.fields.count.0: 2}
|
||||
|
||||
---
|
||||
"standard retriever terminate after":
|
||||
- do:
|
||||
search:
|
||||
index: animals
|
||||
body:
|
||||
fields: [ "name", "count" ]
|
||||
retriever:
|
||||
standard:
|
||||
filter:
|
||||
bool:
|
||||
must_not:
|
||||
match:
|
||||
name: "cow"
|
||||
sort: [
|
||||
{
|
||||
count: "asc"
|
||||
}
|
||||
]
|
||||
terminate_after: 3
|
||||
|
||||
- match: {hits.total.value: 3}
|
||||
|
||||
- match: {hits.hits.0._id: "4"}
|
||||
- match: {hits.hits.0.fields.name.0: "pig"}
|
||||
- match: {hits.hits.0.fields.count.0: 4}
|
||||
|
||||
- match: {hits.hits.1._id: "5"}
|
||||
- match: {hits.hits.1.fields.name.0: "pig pig"}
|
||||
- match: {hits.hits.1.fields.count.0: 5}
|
||||
|
||||
- match: {hits.hits.2._id: "6"}
|
||||
- match: {hits.hits.2.fields.name.0: "pig pig pig"}
|
||||
- match: {hits.hits.2.fields.count.0: 6}
|
||||
|
||||
---
|
||||
"standard retriever min score":
|
||||
- do:
|
||||
search:
|
||||
index: animals
|
||||
body:
|
||||
fields: [ "name", "count" ]
|
||||
retriever:
|
||||
standard:
|
||||
query:
|
||||
script_score:
|
||||
query:
|
||||
match:
|
||||
name: "cow"
|
||||
script:
|
||||
source: " $('count', -1)"
|
||||
min_score: 1.5
|
||||
|
||||
- match: {hits.total.value: 2}
|
||||
|
||||
- match: {hits.hits.0._id: "3"}
|
||||
- match: {hits.hits.0.fields.name.0: "cow cow cow"}
|
||||
- match: {hits.hits.0.fields.count.0: 3}
|
||||
|
||||
- match: {hits.hits.1._id: "2"}
|
||||
- match: {hits.hits.1.fields.name.0: "cow cow"}
|
||||
- match: {hits.hits.1.fields.count.0: 2}
|
||||
|
||||
---
|
||||
"standard retriever collapse":
|
||||
- do:
|
||||
search:
|
||||
index: animals
|
||||
body:
|
||||
size: 15
|
||||
fields: [ "name", "count", "color" ]
|
||||
retriever:
|
||||
standard:
|
||||
query:
|
||||
match_all: {}
|
||||
collapse:
|
||||
field: "color"
|
||||
sort: [
|
||||
{
|
||||
count: "asc"
|
||||
}
|
||||
]
|
||||
|
||||
- match: {hits.total.value: 15}
|
||||
|
||||
- match: {hits.hits.0._id: "1"}
|
||||
- match: {hits.hits.0.fields.name.0: "cow"}
|
||||
- match: {hits.hits.0.fields.count.0: 1}
|
||||
- match: {hits.hits.0.fields.color.0: "brown"}
|
||||
|
||||
- match: {hits.hits.1._id: "2"}
|
||||
- match: {hits.hits.1.fields.name.0: "cow cow"}
|
||||
- match: {hits.hits.1.fields.count.0: 2}
|
||||
- match: {hits.hits.1.fields.color.0: "spotted"}
|
||||
|
||||
- match: {hits.hits.2._id: "4"}
|
||||
- match: {hits.hits.2.fields.name.0: "pig"}
|
||||
- match: {hits.hits.2.fields.count.0: 4}
|
||||
- match: {hits.hits.2.fields.color.0: "pink"}
|
||||
|
||||
- match: {hits.hits.3._id: "7" }
|
||||
- match: {hits.hits.3.fields.name.0: "chicken" }
|
||||
- match: {hits.hits.3.fields.count.0: 7 }
|
||||
- match: {hits.hits.3.fields.color.0: "white"}
|
||||
|
||||
- match: {hits.hits.4._id: "10"}
|
||||
- match: {hits.hits.4.fields.name.0: "coyote"}
|
||||
- match: {hits.hits.4.fields.count.0: 10}
|
||||
- match: {hits.hits.4.fields.color.0: "gray"}
|
|
@ -0,0 +1,73 @@
|
|||
setup:
|
||||
- skip:
|
||||
version: ' - 8.13.99'
|
||||
reason: 'kNN retriever added in 8.14'
|
||||
- do:
|
||||
indices.create:
|
||||
index: index1
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
number_of_replicas: 0
|
||||
mappings:
|
||||
properties:
|
||||
name:
|
||||
type: keyword
|
||||
vector:
|
||||
type: dense_vector
|
||||
dims: 5
|
||||
index: true
|
||||
similarity: l2_norm
|
||||
|
||||
- do:
|
||||
bulk:
|
||||
refresh: true
|
||||
index: index1
|
||||
body:
|
||||
- '{"index": {"_id": 1 }}'
|
||||
- '{"name": "cow.jpg", "vector": [1, 1, 1, 1, 1]}'
|
||||
- '{"index": {"_id": 2}}'
|
||||
- '{"name": "moose.jpg", "vector": [2, 2, 2, 2, 2]}'
|
||||
- '{"index": {"_id": 3 }}'
|
||||
- '{"name": "rabbit.jpg", "vector": [3, 3, 3, 3, 3]}'
|
||||
|
||||
---
|
||||
"kNN retriever":
|
||||
- do:
|
||||
search:
|
||||
index: index1
|
||||
body:
|
||||
fields: [ "name" ]
|
||||
retriever:
|
||||
knn:
|
||||
field: vector
|
||||
query_vector: [2, 2, 2, 2, 3]
|
||||
k: 2
|
||||
num_candidates: 3
|
||||
|
||||
- match: {hits.hits.0._id: "2"}
|
||||
- match: {hits.hits.0.fields.name.0: "moose.jpg"}
|
||||
|
||||
- match: {hits.hits.1._id: "3"}
|
||||
- match: {hits.hits.1.fields.name.0: "rabbit.jpg"}
|
||||
|
||||
---
|
||||
"kNN retriever with filter":
|
||||
- do:
|
||||
search:
|
||||
index: index1
|
||||
body:
|
||||
fields: [ "name" ]
|
||||
retriever:
|
||||
knn:
|
||||
field: vector
|
||||
query_vector: [2, 2, 2, 2, 3]
|
||||
k: 2
|
||||
num_candidates: 3
|
||||
filter:
|
||||
term:
|
||||
name: "rabbit.jpg"
|
||||
|
||||
- match: {hits.total.value: 1}
|
||||
- match: {hits.hits.0._id: "3"}
|
||||
- match: {hits.hits.0.fields.name.0: "rabbit.jpg"}
|
|
@ -358,6 +358,7 @@ module org.elasticsearch.server {
|
|||
exports org.elasticsearch.search.query;
|
||||
exports org.elasticsearch.search.rank;
|
||||
exports org.elasticsearch.search.rescore;
|
||||
exports org.elasticsearch.search.retriever;
|
||||
exports org.elasticsearch.search.runtime;
|
||||
exports org.elasticsearch.search.searchafter;
|
||||
exports org.elasticsearch.search.slice;
|
||||
|
@ -415,7 +416,8 @@ module org.elasticsearch.server {
|
|||
org.elasticsearch.cluster.service.TransportFeatures,
|
||||
org.elasticsearch.cluster.metadata.MetadataFeatures,
|
||||
org.elasticsearch.rest.RestFeatures,
|
||||
org.elasticsearch.indices.IndicesFeatures;
|
||||
org.elasticsearch.indices.IndicesFeatures,
|
||||
org.elasticsearch.search.retriever.RetrieversFeatures;
|
||||
|
||||
uses org.elasticsearch.plugins.internal.SettingsExtension;
|
||||
uses RestExtension;
|
||||
|
|
|
@ -37,6 +37,8 @@ import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
|
|||
import org.elasticsearch.search.internal.ShardSearchRequest;
|
||||
import org.elasticsearch.search.rescore.Rescorer;
|
||||
import org.elasticsearch.search.rescore.RescorerBuilder;
|
||||
import org.elasticsearch.search.retriever.RetrieverBuilder;
|
||||
import org.elasticsearch.search.retriever.RetrieverParser;
|
||||
import org.elasticsearch.search.suggest.Suggest;
|
||||
import org.elasticsearch.search.suggest.Suggester;
|
||||
import org.elasticsearch.search.suggest.SuggestionBuilder;
|
||||
|
@ -111,6 +113,13 @@ public interface SearchPlugin {
|
|||
return emptyList();
|
||||
}
|
||||
|
||||
/**
|
||||
* The new {@link RetrieverBuilder}s defined by this plugin.
|
||||
*/
|
||||
default List<RetrieverSpec<?>> getRetrievers() {
|
||||
return emptyList();
|
||||
}
|
||||
|
||||
/**
|
||||
* The new {@link Query}s defined by this plugin.
|
||||
*/
|
||||
|
@ -256,6 +265,47 @@ public interface SearchPlugin {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Specification of custom {@link RetrieverBuilder}.
|
||||
*/
|
||||
class RetrieverSpec<RB extends RetrieverBuilder> {
|
||||
|
||||
private final ParseField name;
|
||||
private final RetrieverParser<RB> parser;
|
||||
|
||||
/**
|
||||
* Specification of custom {@link RetrieverBuilder}.
|
||||
*
|
||||
* @param name holds the names by which this retriever might be parsed. The {@link ParseField#getPreferredName()} is special as it
|
||||
* is the name by under which the reader is registered. So it is the name that the retriever should use as its
|
||||
* {@link NamedWriteable#getWriteableName()} too.
|
||||
* @param parser the parser the reads the retriever builder from xcontent
|
||||
*/
|
||||
public RetrieverSpec(ParseField name, RetrieverParser<RB> parser) {
|
||||
this.name = name;
|
||||
this.parser = parser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Specification of custom {@link RetrieverBuilder}.
|
||||
*
|
||||
* @param name the name by which this retriever might be parsed or deserialized. Make sure that the retriever builder returns
|
||||
* this name for {@link NamedWriteable#getWriteableName()}.
|
||||
* @param parser the parser the reads the retriever builder from xcontent
|
||||
*/
|
||||
public RetrieverSpec(String name, RetrieverParser<RB> parser) {
|
||||
this(new ParseField(name), parser);
|
||||
}
|
||||
|
||||
public ParseField getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public RetrieverParser<RB> getParser() {
|
||||
return parser;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Specification of custom {@link Query}.
|
||||
*/
|
||||
|
|
|
@ -86,6 +86,7 @@ import org.elasticsearch.plugins.SearchPlugin.PipelineAggregationSpec;
|
|||
import org.elasticsearch.plugins.SearchPlugin.QuerySpec;
|
||||
import org.elasticsearch.plugins.SearchPlugin.QueryVectorBuilderSpec;
|
||||
import org.elasticsearch.plugins.SearchPlugin.RescorerSpec;
|
||||
import org.elasticsearch.plugins.SearchPlugin.RetrieverSpec;
|
||||
import org.elasticsearch.plugins.SearchPlugin.ScoreFunctionSpec;
|
||||
import org.elasticsearch.plugins.SearchPlugin.SearchExtSpec;
|
||||
import org.elasticsearch.plugins.SearchPlugin.SignificanceHeuristicSpec;
|
||||
|
@ -227,6 +228,10 @@ import org.elasticsearch.search.fetch.subphase.highlight.PlainHighlighter;
|
|||
import org.elasticsearch.search.internal.ShardSearchRequest;
|
||||
import org.elasticsearch.search.rescore.QueryRescorerBuilder;
|
||||
import org.elasticsearch.search.rescore.RescorerBuilder;
|
||||
import org.elasticsearch.search.retriever.KnnRetrieverBuilder;
|
||||
import org.elasticsearch.search.retriever.RetrieverBuilder;
|
||||
import org.elasticsearch.search.retriever.RetrieverParserContext;
|
||||
import org.elasticsearch.search.retriever.StandardRetrieverBuilder;
|
||||
import org.elasticsearch.search.sort.FieldSortBuilder;
|
||||
import org.elasticsearch.search.sort.GeoDistanceSortBuilder;
|
||||
import org.elasticsearch.search.sort.ScoreSortBuilder;
|
||||
|
@ -305,6 +310,7 @@ public class SearchModule {
|
|||
registerSuggesters(plugins);
|
||||
highlighters = setupHighlighters(settings, plugins);
|
||||
registerScoreFunctions(plugins);
|
||||
registerRetrieverParsers(plugins);
|
||||
registerQueryParsers(plugins);
|
||||
registerRescorers(plugins);
|
||||
registerSorts();
|
||||
|
@ -1039,6 +1045,13 @@ public class SearchModule {
|
|||
fetchSubPhases.add(requireNonNull(subPhase, "FetchSubPhase must not be null"));
|
||||
}
|
||||
|
||||
private void registerRetrieverParsers(List<SearchPlugin> plugins) {
|
||||
registerRetriever(new RetrieverSpec<>(StandardRetrieverBuilder.NAME, StandardRetrieverBuilder::fromXContent));
|
||||
registerRetriever(new RetrieverSpec<>(KnnRetrieverBuilder.NAME, KnnRetrieverBuilder::fromXContent));
|
||||
|
||||
registerFromPlugin(plugins, SearchPlugin::getRetrievers, this::registerRetriever);
|
||||
}
|
||||
|
||||
private void registerQueryParsers(List<SearchPlugin> plugins) {
|
||||
registerQuery(new QuerySpec<>(MatchQueryBuilder.NAME, MatchQueryBuilder::new, MatchQueryBuilder::fromXContent));
|
||||
registerQuery(new QuerySpec<>(MatchPhraseQueryBuilder.NAME, MatchPhraseQueryBuilder::new, MatchPhraseQueryBuilder::fromXContent));
|
||||
|
@ -1198,6 +1211,17 @@ public class SearchModule {
|
|||
);
|
||||
}
|
||||
|
||||
private void registerRetriever(RetrieverSpec<?> spec) {
|
||||
namedXContents.add(
|
||||
new NamedXContentRegistry.Entry(
|
||||
RetrieverBuilder.class,
|
||||
spec.getName(),
|
||||
(p, c) -> spec.getParser().fromXContent(p, (RetrieverParserContext) c),
|
||||
spec.getName().getForRestApiVersion()
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
private void registerQuery(QuerySpec<?> spec) {
|
||||
namedWriteables.add(new NamedWriteableRegistry.Entry(QueryBuilder.class, spec.getName().getPreferredName(), spec.getReader()));
|
||||
namedXContents.add(
|
||||
|
|
|
@ -39,6 +39,8 @@ import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
|
|||
import org.elasticsearch.search.internal.SearchContext;
|
||||
import org.elasticsearch.search.rank.RankBuilder;
|
||||
import org.elasticsearch.search.rescore.RescorerBuilder;
|
||||
import org.elasticsearch.search.retriever.RetrieverBuilder;
|
||||
import org.elasticsearch.search.retriever.RetrieverParserContext;
|
||||
import org.elasticsearch.search.searchafter.SearchAfterBuilder;
|
||||
import org.elasticsearch.search.slice.SliceBuilder;
|
||||
import org.elasticsearch.search.sort.ScoreSortBuilder;
|
||||
|
@ -70,6 +72,7 @@ import java.util.stream.Collectors;
|
|||
|
||||
import static java.util.Collections.emptyMap;
|
||||
import static org.elasticsearch.index.query.AbstractQueryBuilder.parseTopLevelQuery;
|
||||
import static org.elasticsearch.search.internal.SearchContext.DEFAULT_TERMINATE_AFTER;
|
||||
import static org.elasticsearch.search.internal.SearchContext.TRACK_TOTAL_HITS_ACCURATE;
|
||||
import static org.elasticsearch.search.internal.SearchContext.TRACK_TOTAL_HITS_DISABLED;
|
||||
|
||||
|
@ -120,6 +123,7 @@ public final class SearchSourceBuilder implements Writeable, ToXContentObject, R
|
|||
public static final ParseField SLICE = new ParseField("slice");
|
||||
public static final ParseField POINT_IN_TIME = new ParseField("pit");
|
||||
public static final ParseField RUNTIME_MAPPINGS_FIELD = new ParseField("runtime_mappings");
|
||||
public static final ParseField RETRIEVER = new ParseField("retriever");
|
||||
|
||||
private static final boolean RANK_SUPPORTED = Booleans.parseBoolean(System.getProperty("es.search.rank_supported"), true);
|
||||
|
||||
|
@ -1285,6 +1289,7 @@ public final class SearchSourceBuilder implements Writeable, ToXContentObject, R
|
|||
}
|
||||
List<KnnSearchBuilder.Builder> knnBuilders = new ArrayList<>();
|
||||
|
||||
RetrieverBuilder retrieverBuilder = null;
|
||||
SearchUsage searchUsage = new SearchUsage();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
|
@ -1353,7 +1358,15 @@ public final class SearchSourceBuilder implements Writeable, ToXContentObject, R
|
|||
);
|
||||
}
|
||||
} else if (token == XContentParser.Token.START_OBJECT) {
|
||||
if (QUERY_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
|
||||
if (RETRIEVER.match(currentFieldName, parser.getDeprecationHandler())) {
|
||||
if (clusterSupportsFeature.test(RetrieverBuilder.RETRIEVERS_SUPPORTED) == false) {
|
||||
throw new ParsingException(parser.getTokenLocation(), "Unknown key for a START_OBJECT in [retriever].");
|
||||
}
|
||||
retrieverBuilder = RetrieverBuilder.parseTopLevelRetrieverBuilder(
|
||||
parser,
|
||||
new RetrieverParserContext(searchUsage, clusterSupportsFeature)
|
||||
);
|
||||
} else if (QUERY_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
|
||||
if (subSearchSourceBuilders.isEmpty() == false) {
|
||||
throw new IllegalArgumentException(
|
||||
"cannot specify field [" + currentFieldName + "] and field [" + SUB_SEARCHES_FIELD.getPreferredName() + "]"
|
||||
|
@ -1611,6 +1624,38 @@ public final class SearchSourceBuilder implements Writeable, ToXContentObject, R
|
|||
|
||||
knnSearch = knnBuilders.stream().map(knnBuilder -> knnBuilder.build(size())).collect(Collectors.toList());
|
||||
|
||||
if (retrieverBuilder != null) {
|
||||
List<String> specified = new ArrayList<>();
|
||||
if (subSearchSourceBuilders.isEmpty() == false) {
|
||||
specified.add(QUERY_FIELD.getPreferredName());
|
||||
}
|
||||
if (knnSearch.isEmpty() == false) {
|
||||
specified.add(KNN_FIELD.getPreferredName());
|
||||
}
|
||||
if (searchAfterBuilder != null) {
|
||||
specified.add(SEARCH_AFTER.getPreferredName());
|
||||
}
|
||||
if (terminateAfter != DEFAULT_TERMINATE_AFTER) {
|
||||
specified.add(TERMINATE_AFTER_FIELD.getPreferredName());
|
||||
}
|
||||
if (sorts != null) {
|
||||
specified.add(SORT_FIELD.getPreferredName());
|
||||
}
|
||||
if (rescoreBuilders != null) {
|
||||
specified.add(RESCORE_FIELD.getPreferredName());
|
||||
}
|
||||
if (minScore != null) {
|
||||
specified.add(MIN_SCORE_FIELD.getPreferredName());
|
||||
}
|
||||
if (rankBuilder != null) {
|
||||
specified.add(RANK_FIELD.getPreferredName());
|
||||
}
|
||||
if (specified.isEmpty() == false) {
|
||||
throw new IllegalArgumentException("cannot specify [" + RETRIEVER.getPreferredName() + "] and " + specified);
|
||||
}
|
||||
retrieverBuilder.extractToSearchSourceBuilder(this, false);
|
||||
}
|
||||
|
||||
searchUsageConsumer.accept(searchUsage);
|
||||
return this;
|
||||
}
|
||||
|
|
|
@ -195,6 +195,11 @@ public class CollapseBuilder implements Writeable, ToXContentObject {
|
|||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return Strings.toString(this, true, true);
|
||||
}
|
||||
|
||||
public CollapseContext build(SearchExecutionContext searchExecutionContext) {
|
||||
MappedFieldType fieldType = searchExecutionContext.getFieldType(field);
|
||||
if (fieldType == null) {
|
||||
|
|
|
@ -0,0 +1,171 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.retriever;
|
||||
|
||||
import org.elasticsearch.common.ParsingException;
|
||||
import org.elasticsearch.features.NodeFeature;
|
||||
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
||||
import org.elasticsearch.search.vectors.KnnSearchBuilder;
|
||||
import org.elasticsearch.search.vectors.QueryVectorBuilder;
|
||||
import org.elasticsearch.xcontent.ConstructingObjectParser;
|
||||
import org.elasticsearch.xcontent.ParseField;
|
||||
import org.elasticsearch.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.xcontent.XContentParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;
|
||||
import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;
|
||||
|
||||
/**
|
||||
* A knn retriever is used to represent a knn search
|
||||
* with some elements to specify parameters for that knn search.
|
||||
*/
|
||||
public final class KnnRetrieverBuilder extends RetrieverBuilder {
|
||||
|
||||
public static final String NAME = "knn";
|
||||
public static final NodeFeature KNN_RETRIEVER_SUPPORTED = new NodeFeature("knn_retriever_supported");
|
||||
|
||||
public static final ParseField FIELD_FIELD = new ParseField("field");
|
||||
public static final ParseField K_FIELD = new ParseField("k");
|
||||
public static final ParseField NUM_CANDS_FIELD = new ParseField("num_candidates");
|
||||
public static final ParseField QUERY_VECTOR_FIELD = new ParseField("query_vector");
|
||||
public static final ParseField QUERY_VECTOR_BUILDER_FIELD = new ParseField("query_vector_builder");
|
||||
public static final ParseField VECTOR_SIMILARITY = new ParseField("similarity");
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public static final ConstructingObjectParser<KnnRetrieverBuilder, RetrieverParserContext> PARSER = new ConstructingObjectParser<>(
|
||||
"knn",
|
||||
args -> {
|
||||
List<Float> vector = (List<Float>) args[1];
|
||||
final float[] vectorArray;
|
||||
if (vector != null) {
|
||||
vectorArray = new float[vector.size()];
|
||||
for (int i = 0; i < vector.size(); i++) {
|
||||
vectorArray[i] = vector.get(i);
|
||||
}
|
||||
} else {
|
||||
vectorArray = null;
|
||||
}
|
||||
return new KnnRetrieverBuilder(
|
||||
(String) args[0],
|
||||
vectorArray,
|
||||
(QueryVectorBuilder) args[2],
|
||||
(int) args[3],
|
||||
(int) args[4],
|
||||
(Float) args[5]
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
static {
|
||||
PARSER.declareString(constructorArg(), FIELD_FIELD);
|
||||
PARSER.declareFloatArray(optionalConstructorArg(), QUERY_VECTOR_FIELD);
|
||||
PARSER.declareNamedObject(
|
||||
optionalConstructorArg(),
|
||||
(p, c, n) -> p.namedObject(QueryVectorBuilder.class, n, c),
|
||||
QUERY_VECTOR_BUILDER_FIELD
|
||||
);
|
||||
PARSER.declareInt(constructorArg(), K_FIELD);
|
||||
PARSER.declareInt(constructorArg(), NUM_CANDS_FIELD);
|
||||
PARSER.declareFloat(optionalConstructorArg(), VECTOR_SIMILARITY);
|
||||
RetrieverBuilder.declareBaseParserFields(NAME, PARSER);
|
||||
}
|
||||
|
||||
public static KnnRetrieverBuilder fromXContent(XContentParser parser, RetrieverParserContext context) throws IOException {
|
||||
if (context.clusterSupportsFeature(KNN_RETRIEVER_SUPPORTED) == false) {
|
||||
throw new ParsingException(parser.getTokenLocation(), "unknown retriever [" + NAME + "]");
|
||||
}
|
||||
return PARSER.apply(parser, context);
|
||||
}
|
||||
|
||||
private final String field;
|
||||
private final float[] queryVector;
|
||||
private final QueryVectorBuilder queryVectorBuilder;
|
||||
private final int k;
|
||||
private final int numCands;
|
||||
private final Float similarity;
|
||||
|
||||
public KnnRetrieverBuilder(
|
||||
String field,
|
||||
float[] queryVector,
|
||||
QueryVectorBuilder queryVectorBuilder,
|
||||
int k,
|
||||
int numCands,
|
||||
Float similarity
|
||||
) {
|
||||
this.field = field;
|
||||
this.queryVector = queryVector;
|
||||
this.queryVectorBuilder = queryVectorBuilder;
|
||||
this.k = k;
|
||||
this.numCands = numCands;
|
||||
this.similarity = similarity;
|
||||
}
|
||||
|
||||
// ---- FOR TESTING XCONTENT PARSING ----
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return NAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder, boolean compoundUsed) {
|
||||
KnnSearchBuilder knnSearchBuilder = new KnnSearchBuilder(field, queryVector, queryVectorBuilder, k, numCands, similarity);
|
||||
if (preFilterQueryBuilders != null) {
|
||||
knnSearchBuilder.addFilterQueries(preFilterQueryBuilders);
|
||||
}
|
||||
List<KnnSearchBuilder> knnSearchBuilders = new ArrayList<>(searchSourceBuilder.knnSearch());
|
||||
knnSearchBuilders.add(knnSearchBuilder);
|
||||
searchSourceBuilder.knnSearch(knnSearchBuilders);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doToXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.field(FIELD_FIELD.getPreferredName(), field);
|
||||
builder.field(K_FIELD.getPreferredName(), k);
|
||||
builder.field(NUM_CANDS_FIELD.getPreferredName(), numCands);
|
||||
|
||||
if (queryVector != null) {
|
||||
builder.field(QUERY_VECTOR_FIELD.getPreferredName(), queryVector);
|
||||
}
|
||||
|
||||
if (queryVectorBuilder != null) {
|
||||
builder.field(QUERY_VECTOR_BUILDER_FIELD.getPreferredName(), queryVectorBuilder);
|
||||
}
|
||||
|
||||
if (similarity != null) {
|
||||
builder.field(VECTOR_SIMILARITY.getPreferredName(), similarity);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean doEquals(Object o) {
|
||||
KnnRetrieverBuilder that = (KnnRetrieverBuilder) o;
|
||||
return k == that.k
|
||||
&& numCands == that.numCands
|
||||
&& Objects.equals(field, that.field)
|
||||
&& Arrays.equals(queryVector, that.queryVector)
|
||||
&& Objects.equals(queryVectorBuilder, that.queryVectorBuilder)
|
||||
&& Objects.equals(similarity, that.similarity);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doHashCode() {
|
||||
int result = Objects.hash(field, queryVectorBuilder, k, numCands, similarity);
|
||||
result = 31 * result + Arrays.hashCode(queryVector);
|
||||
return result;
|
||||
}
|
||||
|
||||
// ---- END TESTING ----
|
||||
}
|
|
@ -0,0 +1,234 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.retriever;
|
||||
|
||||
import org.elasticsearch.common.ParsingException;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.xcontent.SuggestingErrorOnUnknown;
|
||||
import org.elasticsearch.features.NodeFeature;
|
||||
import org.elasticsearch.index.query.AbstractQueryBuilder;
|
||||
import org.elasticsearch.index.query.QueryBuilder;
|
||||
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
||||
import org.elasticsearch.xcontent.AbstractObjectParser;
|
||||
import org.elasticsearch.xcontent.FilterXContentParserWrapper;
|
||||
import org.elasticsearch.xcontent.NamedObjectNotFoundException;
|
||||
import org.elasticsearch.xcontent.ParseField;
|
||||
import org.elasticsearch.xcontent.ToXContent;
|
||||
import org.elasticsearch.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.xcontent.XContentLocation;
|
||||
import org.elasticsearch.xcontent.XContentParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* A retriever represents an API element that returns an ordered list of top
|
||||
* documents. These can be obtained from a query, from another retriever, etc.
|
||||
* Internally, a {@link RetrieverBuilder} is just a wrapper for other search
|
||||
* elements that are extracted into a {@link SearchSourceBuilder}. The advantage
|
||||
* retrievers have is in the API they appear as a tree-like structure enabling
|
||||
* easier reasoning about what a search does.
|
||||
*
|
||||
* This is the base class for all other retrievers. This class does not support
|
||||
* serialization and is expected to be fully extracted to a {@link SearchSourceBuilder}
|
||||
* prior to any transport calls.
|
||||
*/
|
||||
public abstract class RetrieverBuilder implements ToXContent {
|
||||
|
||||
public static final NodeFeature RETRIEVERS_SUPPORTED = new NodeFeature("retrievers_supported");
|
||||
|
||||
public static final ParseField PRE_FILTER_FIELD = new ParseField("filter");
|
||||
|
||||
protected static void declareBaseParserFields(
|
||||
String name,
|
||||
AbstractObjectParser<? extends RetrieverBuilder, RetrieverParserContext> parser
|
||||
) {
|
||||
parser.declareObjectArray((r, v) -> r.preFilterQueryBuilders = v, (p, c) -> {
|
||||
QueryBuilder preFilterQueryBuilder = AbstractQueryBuilder.parseTopLevelQuery(p, c::trackQueryUsage);
|
||||
c.trackSectionUsage(name + ":" + PRE_FILTER_FIELD.getPreferredName());
|
||||
return preFilterQueryBuilder;
|
||||
}, PRE_FILTER_FIELD);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method parsers a top-level retriever within a search and tracks its own depth. Currently, the
|
||||
* maximum depth allowed is limited to 2 as a compound retriever cannot currently contain another
|
||||
* compound retriever.
|
||||
*/
|
||||
public static RetrieverBuilder parseTopLevelRetrieverBuilder(XContentParser parser, RetrieverParserContext context) throws IOException {
|
||||
parser = new FilterXContentParserWrapper(parser) {
|
||||
|
||||
int nestedDepth = 0;
|
||||
|
||||
@Override
|
||||
public <T> T namedObject(Class<T> categoryClass, String name, Object context) throws IOException {
|
||||
if (categoryClass.equals(RetrieverBuilder.class)) {
|
||||
nestedDepth++;
|
||||
|
||||
if (nestedDepth > 2) {
|
||||
throw new IllegalArgumentException(
|
||||
"the nested depth of the [" + name + "] retriever exceeds the maximum nested depth [2] for retrievers"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
T namedObject = getXContentRegistry().parseNamedObject(categoryClass, name, this, context);
|
||||
|
||||
if (categoryClass.equals(RetrieverBuilder.class)) {
|
||||
nestedDepth--;
|
||||
}
|
||||
|
||||
return namedObject;
|
||||
}
|
||||
};
|
||||
|
||||
return parseInnerRetrieverBuilder(parser, context);
|
||||
}
|
||||
|
||||
protected static RetrieverBuilder parseInnerRetrieverBuilder(XContentParser parser, RetrieverParserContext context) throws IOException {
|
||||
Objects.requireNonNull(context);
|
||||
|
||||
if (parser.currentToken() != XContentParser.Token.START_OBJECT && parser.nextToken() != XContentParser.Token.START_OBJECT) {
|
||||
throw new ParsingException(
|
||||
parser.getTokenLocation(),
|
||||
"retriever malformed, must start with [" + XContentParser.Token.START_OBJECT + "]"
|
||||
);
|
||||
}
|
||||
|
||||
if (parser.nextToken() == XContentParser.Token.END_OBJECT) {
|
||||
throw new ParsingException(parser.getTokenLocation(), "retriever malformed, empty clause found");
|
||||
}
|
||||
|
||||
if (parser.currentToken() != XContentParser.Token.FIELD_NAME) {
|
||||
throw new ParsingException(
|
||||
parser.getTokenLocation(),
|
||||
"retriever malformed, no field after [" + XContentParser.Token.START_OBJECT + "]"
|
||||
);
|
||||
}
|
||||
|
||||
String retrieverName = parser.currentName();
|
||||
|
||||
if (parser.nextToken() != XContentParser.Token.START_OBJECT) {
|
||||
throw new ParsingException(
|
||||
parser.getTokenLocation(),
|
||||
"[" + retrieverName + "] retriever malformed, no [" + XContentParser.Token.START_OBJECT + "] after retriever name"
|
||||
);
|
||||
}
|
||||
|
||||
RetrieverBuilder retrieverBuilder;
|
||||
|
||||
try {
|
||||
retrieverBuilder = parser.namedObject(RetrieverBuilder.class, retrieverName, context);
|
||||
} catch (NamedObjectNotFoundException nonfe) {
|
||||
String message = String.format(
|
||||
Locale.ROOT,
|
||||
"unknown retriever [%s]%s",
|
||||
retrieverName,
|
||||
SuggestingErrorOnUnknown.suggest(retrieverName, nonfe.getCandidates())
|
||||
);
|
||||
|
||||
throw new ParsingException(new XContentLocation(nonfe.getLineNumber(), nonfe.getColumnNumber()), message, nonfe);
|
||||
}
|
||||
|
||||
context.trackSectionUsage(retrieverName);
|
||||
|
||||
if (parser.currentToken() != XContentParser.Token.END_OBJECT) {
|
||||
throw new ParsingException(
|
||||
parser.getTokenLocation(),
|
||||
"["
|
||||
+ retrieverName
|
||||
+ "] malformed retriever, expected ["
|
||||
+ XContentParser.Token.END_OBJECT
|
||||
+ "] but found ["
|
||||
+ parser.currentToken()
|
||||
+ "]"
|
||||
);
|
||||
}
|
||||
|
||||
if (parser.nextToken() != XContentParser.Token.END_OBJECT) {
|
||||
throw new ParsingException(
|
||||
parser.getTokenLocation(),
|
||||
"["
|
||||
+ retrieverName
|
||||
+ "] malformed retriever, expected ["
|
||||
+ XContentParser.Token.END_OBJECT
|
||||
+ "] but found ["
|
||||
+ parser.currentToken()
|
||||
+ "]"
|
||||
);
|
||||
}
|
||||
|
||||
return retrieverBuilder;
|
||||
}
|
||||
|
||||
protected List<QueryBuilder> preFilterQueryBuilders = new ArrayList<>();
|
||||
|
||||
/**
|
||||
* Gets the filters for this retriever.
|
||||
*/
|
||||
public List<QueryBuilder> getPreFilterQueryBuilders() {
|
||||
return preFilterQueryBuilders;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called at the end of parsing on behalf of a {@link SearchSourceBuilder}.
|
||||
* Elements from retrievers are expected to be "extracted" into the {@link SearchSourceBuilder}.
|
||||
*/
|
||||
public abstract void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder, boolean compoundUsed);
|
||||
|
||||
// ---- FOR TESTING XCONTENT PARSING ----
|
||||
|
||||
public abstract String getName();
|
||||
|
||||
@Override
|
||||
public final XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException {
|
||||
builder.startObject();
|
||||
if (preFilterQueryBuilders.isEmpty() == false) {
|
||||
builder.field(PRE_FILTER_FIELD.getPreferredName(), preFilterQueryBuilders);
|
||||
}
|
||||
doToXContent(builder, params);
|
||||
builder.endObject();
|
||||
|
||||
return builder;
|
||||
}
|
||||
|
||||
protected abstract void doToXContent(XContentBuilder builder, ToXContent.Params params) throws IOException;
|
||||
|
||||
@Override
|
||||
public boolean isFragment() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
RetrieverBuilder that = (RetrieverBuilder) o;
|
||||
return Objects.equals(preFilterQueryBuilders, that.preFilterQueryBuilders) && doEquals(o);
|
||||
}
|
||||
|
||||
protected abstract boolean doEquals(Object o);
|
||||
|
||||
@Override
|
||||
public final int hashCode() {
|
||||
return Objects.hash(getClass(), preFilterQueryBuilders, doHashCode());
|
||||
}
|
||||
|
||||
protected abstract int doHashCode();
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return Strings.toString(this, true, true);
|
||||
}
|
||||
|
||||
// ---- END FOR TESTING ----
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.retriever;
|
||||
|
||||
import org.elasticsearch.xcontent.XContentParser;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Defines a retriever parser that is able to parse {@link RetrieverBuilder}s
|
||||
* from {@link org.elasticsearch.xcontent.XContent}.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface RetrieverParser<RB extends RetrieverBuilder> {
|
||||
|
||||
/**
|
||||
* Creates a new {@link RetrieverBuilder} from the retriever held by the
|
||||
* {@link XContentParser}. The state on the parser contained in this context
|
||||
* will be changed as a side effect of this method call. The
|
||||
* {@link RetrieverParserContext} tracks usage of retriever features and
|
||||
* queries when available.
|
||||
*/
|
||||
RB fromXContent(XContentParser parser, RetrieverParserContext context) throws IOException;
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.retriever;
|
||||
|
||||
import org.elasticsearch.features.NodeFeature;
|
||||
import org.elasticsearch.usage.SearchUsage;
|
||||
|
||||
import java.util.Objects;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
public class RetrieverParserContext {
|
||||
|
||||
protected final SearchUsage searchUsage;
|
||||
protected final Predicate<NodeFeature> clusterSupportsFeature;
|
||||
|
||||
public RetrieverParserContext(SearchUsage searchUsage, Predicate<NodeFeature> clusterSupportsFeature) {
|
||||
this.searchUsage = Objects.requireNonNull(searchUsage);
|
||||
this.clusterSupportsFeature = clusterSupportsFeature;
|
||||
}
|
||||
|
||||
public void trackSectionUsage(String section) {
|
||||
searchUsage.trackSectionUsage(section);
|
||||
}
|
||||
|
||||
public void trackQueryUsage(String query) {
|
||||
searchUsage.trackQueryUsage(query);
|
||||
}
|
||||
|
||||
public void trackRescorerUsage(String name) {
|
||||
searchUsage.trackRescorerUsage(name);
|
||||
}
|
||||
|
||||
public boolean clusterSupportsFeature(NodeFeature nodeFeature) {
|
||||
return clusterSupportsFeature != null && clusterSupportsFeature.test(nodeFeature);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.retriever;
|
||||
|
||||
import org.elasticsearch.features.FeatureSpecification;
|
||||
import org.elasticsearch.features.NodeFeature;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Each retriever is given its own {@link NodeFeature} so new
|
||||
* retrievers can be added individually with additional functionality.
|
||||
*/
|
||||
public class RetrieversFeatures implements FeatureSpecification {
|
||||
|
||||
@Override
|
||||
public Set<NodeFeature> getFeatures() {
|
||||
return Set.of(
|
||||
RetrieverBuilder.RETRIEVERS_SUPPORTED,
|
||||
StandardRetrieverBuilder.STANDARD_RETRIEVER_SUPPORTED,
|
||||
KnnRetrieverBuilder.KNN_RETRIEVER_SUPPORTED
|
||||
);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,229 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.retriever;
|
||||
|
||||
import org.elasticsearch.common.ParsingException;
|
||||
import org.elasticsearch.features.NodeFeature;
|
||||
import org.elasticsearch.index.query.AbstractQueryBuilder;
|
||||
import org.elasticsearch.index.query.BoolQueryBuilder;
|
||||
import org.elasticsearch.index.query.QueryBuilder;
|
||||
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
||||
import org.elasticsearch.search.builder.SubSearchSourceBuilder;
|
||||
import org.elasticsearch.search.collapse.CollapseBuilder;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
import org.elasticsearch.search.searchafter.SearchAfterBuilder;
|
||||
import org.elasticsearch.search.sort.SortBuilder;
|
||||
import org.elasticsearch.xcontent.ObjectParser;
|
||||
import org.elasticsearch.xcontent.ParseField;
|
||||
import org.elasticsearch.xcontent.ToXContent;
|
||||
import org.elasticsearch.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.xcontent.XContentParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* A standard retriever is used to represent anything that is a query along
|
||||
* with some elements to specify parameters for that query.
|
||||
*/
|
||||
public final class StandardRetrieverBuilder extends RetrieverBuilder implements ToXContent {
|
||||
|
||||
public static final String NAME = "standard";
|
||||
public static final NodeFeature STANDARD_RETRIEVER_SUPPORTED = new NodeFeature("standard_retriever_supported");
|
||||
|
||||
public static final ParseField QUERY_FIELD = new ParseField("query");
|
||||
public static final ParseField SEARCH_AFTER_FIELD = new ParseField("search_after");
|
||||
public static final ParseField TERMINATE_AFTER_FIELD = new ParseField("terminate_after");
|
||||
public static final ParseField SORT_FIELD = new ParseField("sort");
|
||||
public static final ParseField MIN_SCORE_FIELD = new ParseField("min_score");
|
||||
public static final ParseField COLLAPSE_FIELD = new ParseField("collapse");
|
||||
|
||||
public static final ObjectParser<StandardRetrieverBuilder, RetrieverParserContext> PARSER = new ObjectParser<>(
|
||||
NAME,
|
||||
StandardRetrieverBuilder::new
|
||||
);
|
||||
|
||||
static {
|
||||
PARSER.declareObject((r, v) -> r.queryBuilder = v, (p, c) -> {
|
||||
QueryBuilder queryBuilder = AbstractQueryBuilder.parseTopLevelQuery(p, c::trackQueryUsage);
|
||||
c.trackSectionUsage(NAME + ":" + QUERY_FIELD.getPreferredName());
|
||||
return queryBuilder;
|
||||
}, QUERY_FIELD);
|
||||
|
||||
PARSER.declareField((r, v) -> r.searchAfterBuilder = v, (p, c) -> {
|
||||
SearchAfterBuilder searchAfterBuilder = SearchAfterBuilder.fromXContent(p);
|
||||
c.trackSectionUsage(NAME + ":" + SEARCH_AFTER_FIELD.getPreferredName());
|
||||
return searchAfterBuilder;
|
||||
}, SEARCH_AFTER_FIELD, ObjectParser.ValueType.OBJECT_ARRAY);
|
||||
|
||||
PARSER.declareField((r, v) -> r.terminateAfter = v, (p, c) -> {
|
||||
int terminateAfter = p.intValue();
|
||||
c.trackSectionUsage(NAME + ":" + TERMINATE_AFTER_FIELD.getPreferredName());
|
||||
return terminateAfter;
|
||||
}, TERMINATE_AFTER_FIELD, ObjectParser.ValueType.INT);
|
||||
|
||||
PARSER.declareField((r, v) -> r.sortBuilders = v, (p, c) -> {
|
||||
List<SortBuilder<?>> sortBuilders = SortBuilder.fromXContent(p);
|
||||
c.trackSectionUsage(NAME + ":" + SORT_FIELD.getPreferredName());
|
||||
return sortBuilders;
|
||||
}, SORT_FIELD, ObjectParser.ValueType.OBJECT_ARRAY);
|
||||
|
||||
PARSER.declareField((r, v) -> r.minScore = v, (p, c) -> {
|
||||
float minScore = p.floatValue();
|
||||
c.trackSectionUsage(NAME + ":" + MIN_SCORE_FIELD.getPreferredName());
|
||||
return minScore;
|
||||
}, MIN_SCORE_FIELD, ObjectParser.ValueType.FLOAT);
|
||||
|
||||
PARSER.declareField((r, v) -> r.collapseBuilder = v, (p, c) -> {
|
||||
CollapseBuilder collapseBuilder = CollapseBuilder.fromXContent(p);
|
||||
if (collapseBuilder.getField() != null) {
|
||||
c.trackSectionUsage(COLLAPSE_FIELD.getPreferredName());
|
||||
}
|
||||
return collapseBuilder;
|
||||
}, COLLAPSE_FIELD, ObjectParser.ValueType.OBJECT);
|
||||
|
||||
RetrieverBuilder.declareBaseParserFields(NAME, PARSER);
|
||||
}
|
||||
|
||||
public static StandardRetrieverBuilder fromXContent(XContentParser parser, RetrieverParserContext context) throws IOException {
|
||||
if (context.clusterSupportsFeature(STANDARD_RETRIEVER_SUPPORTED) == false) {
|
||||
throw new ParsingException(parser.getTokenLocation(), "unknown retriever [" + NAME + "]");
|
||||
}
|
||||
return PARSER.apply(parser, context);
|
||||
}
|
||||
|
||||
QueryBuilder queryBuilder;
|
||||
SearchAfterBuilder searchAfterBuilder;
|
||||
int terminateAfter = SearchContext.DEFAULT_TERMINATE_AFTER;
|
||||
List<SortBuilder<?>> sortBuilders;
|
||||
Float minScore;
|
||||
CollapseBuilder collapseBuilder;
|
||||
|
||||
@Override
|
||||
public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder, boolean compoundUsed) {
|
||||
if (preFilterQueryBuilders.isEmpty() == false) {
|
||||
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
|
||||
|
||||
for (QueryBuilder preFilterQueryBuilder : preFilterQueryBuilders) {
|
||||
boolQueryBuilder.filter(preFilterQueryBuilder);
|
||||
}
|
||||
|
||||
if (queryBuilder != null) {
|
||||
boolQueryBuilder.must(queryBuilder);
|
||||
}
|
||||
|
||||
searchSourceBuilder.subSearches().add(new SubSearchSourceBuilder(boolQueryBuilder));
|
||||
} else if (queryBuilder != null) {
|
||||
searchSourceBuilder.subSearches().add(new SubSearchSourceBuilder(queryBuilder));
|
||||
}
|
||||
|
||||
if (searchAfterBuilder != null) {
|
||||
if (compoundUsed) {
|
||||
throw new IllegalArgumentException(
|
||||
"[" + SEARCH_AFTER_FIELD.getPreferredName() + "] cannot be used in children of compound retrievers"
|
||||
);
|
||||
}
|
||||
|
||||
searchSourceBuilder.searchAfter(searchAfterBuilder.getSortValues());
|
||||
}
|
||||
|
||||
if (terminateAfter != SearchContext.DEFAULT_TERMINATE_AFTER) {
|
||||
if (compoundUsed) {
|
||||
throw new IllegalArgumentException(
|
||||
"[" + TERMINATE_AFTER_FIELD.getPreferredName() + "] cannot be used in children of compound retrievers"
|
||||
);
|
||||
}
|
||||
|
||||
searchSourceBuilder.terminateAfter(terminateAfter);
|
||||
}
|
||||
|
||||
if (sortBuilders != null) {
|
||||
if (compoundUsed) {
|
||||
throw new IllegalArgumentException(
|
||||
"[" + SORT_FIELD.getPreferredName() + "] cannot be used in children of compound retrievers"
|
||||
);
|
||||
}
|
||||
|
||||
searchSourceBuilder.sort(sortBuilders);
|
||||
}
|
||||
|
||||
if (minScore != null) {
|
||||
if (compoundUsed) {
|
||||
throw new IllegalArgumentException(
|
||||
"[" + MIN_SCORE_FIELD.getPreferredName() + "] cannot be used in children of compound retrievers"
|
||||
);
|
||||
}
|
||||
|
||||
searchSourceBuilder.minScore(minScore);
|
||||
}
|
||||
|
||||
if (collapseBuilder != null) {
|
||||
if (compoundUsed) {
|
||||
throw new IllegalArgumentException(
|
||||
"[" + COLLAPSE_FIELD.getPreferredName() + "] cannot be used in children of compound retrievers"
|
||||
);
|
||||
}
|
||||
|
||||
searchSourceBuilder.collapse(collapseBuilder);
|
||||
}
|
||||
}
|
||||
|
||||
// ---- FOR TESTING XCONTENT PARSING ----
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return NAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doToXContent(XContentBuilder builder, ToXContent.Params params) throws IOException {
|
||||
if (queryBuilder != null) {
|
||||
builder.field(QUERY_FIELD.getPreferredName(), queryBuilder);
|
||||
}
|
||||
|
||||
if (searchAfterBuilder != null) {
|
||||
searchAfterBuilder.innerToXContent(builder);
|
||||
}
|
||||
|
||||
if (terminateAfter != SearchContext.DEFAULT_TERMINATE_AFTER) {
|
||||
builder.field(TERMINATE_AFTER_FIELD.getPreferredName(), terminateAfter);
|
||||
}
|
||||
|
||||
if (sortBuilders != null) {
|
||||
builder.field(SORT_FIELD.getPreferredName(), sortBuilders);
|
||||
}
|
||||
|
||||
if (minScore != null) {
|
||||
builder.field(MIN_SCORE_FIELD.getPreferredName(), minScore);
|
||||
}
|
||||
|
||||
if (collapseBuilder != null) {
|
||||
builder.field(COLLAPSE_FIELD.getPreferredName(), collapseBuilder);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean doEquals(Object o) {
|
||||
StandardRetrieverBuilder that = (StandardRetrieverBuilder) o;
|
||||
return terminateAfter == that.terminateAfter
|
||||
&& Objects.equals(queryBuilder, that.queryBuilder)
|
||||
&& Objects.equals(searchAfterBuilder, that.searchAfterBuilder)
|
||||
&& Objects.equals(sortBuilders, that.sortBuilders)
|
||||
&& Objects.equals(minScore, that.minScore)
|
||||
&& Objects.equals(collapseBuilder, that.collapseBuilder);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doHashCode() {
|
||||
return Objects.hash(queryBuilder, searchAfterBuilder, terminateAfter, sortBuilders, minScore, collapseBuilder);
|
||||
}
|
||||
|
||||
// ---- END FOR TESTING ----
|
||||
}
|
|
@ -211,7 +211,7 @@ public class SearchAfterBuilder implements ToXContentObject, Writeable {
|
|||
return builder;
|
||||
}
|
||||
|
||||
void innerToXContent(XContentBuilder builder) throws IOException {
|
||||
public void innerToXContent(XContentBuilder builder) throws IOException {
|
||||
builder.array(SEARCH_AFTER.getPreferredName(), sortValues);
|
||||
}
|
||||
|
||||
|
@ -277,7 +277,8 @@ public class SearchAfterBuilder implements ToXContentObject, Writeable {
|
|||
if ((other instanceof SearchAfterBuilder) == false) {
|
||||
return false;
|
||||
}
|
||||
return Arrays.equals(sortValues, ((SearchAfterBuilder) other).sortValues);
|
||||
boolean value = Arrays.equals(sortValues, ((SearchAfterBuilder) other).sortValues);
|
||||
return value;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -12,3 +12,4 @@ org.elasticsearch.cluster.service.TransportFeatures
|
|||
org.elasticsearch.cluster.metadata.MetadataFeatures
|
||||
org.elasticsearch.rest.RestFeatures
|
||||
org.elasticsearch.indices.IndicesFeatures
|
||||
org.elasticsearch.search.retriever.RetrieversFeatures
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.retriever;
|
||||
|
||||
import org.elasticsearch.action.search.SearchRequest;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.query.QueryBuilder;
|
||||
import org.elasticsearch.index.query.RandomQueryBuilder;
|
||||
import org.elasticsearch.search.SearchModule;
|
||||
import org.elasticsearch.test.AbstractXContentTestCase;
|
||||
import org.elasticsearch.usage.SearchUsage;
|
||||
import org.elasticsearch.xcontent.NamedXContentRegistry;
|
||||
import org.elasticsearch.xcontent.XContentParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import static org.elasticsearch.search.vectors.KnnSearchBuilderTests.randomVector;
|
||||
|
||||
public class KnnRetrieverBuilderParsingTests extends AbstractXContentTestCase<KnnRetrieverBuilder> {
|
||||
|
||||
/**
|
||||
* Creates a random {@link KnnRetrieverBuilder}. The created instance
|
||||
* is not guaranteed to pass {@link SearchRequest} validation. This is purely
|
||||
* for x-content testing.
|
||||
*/
|
||||
public static KnnRetrieverBuilder createRandomKnnRetrieverBuilder() {
|
||||
String field = randomAlphaOfLength(6);
|
||||
int dim = randomIntBetween(2, 30);
|
||||
float[] vector = randomBoolean() ? null : randomVector(dim);
|
||||
int k = randomIntBetween(1, 100);
|
||||
int numCands = randomIntBetween(k + 20, 1000);
|
||||
Float similarity = randomBoolean() ? null : randomFloat();
|
||||
|
||||
KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(field, vector, null, k, numCands, similarity);
|
||||
|
||||
List<QueryBuilder> preFilterQueryBuilders = new ArrayList<>();
|
||||
|
||||
if (randomBoolean()) {
|
||||
for (int i = 0; i < randomIntBetween(1, 3); ++i) {
|
||||
preFilterQueryBuilders.add(RandomQueryBuilder.createQuery(random()));
|
||||
}
|
||||
}
|
||||
|
||||
knnRetrieverBuilder.preFilterQueryBuilders.addAll(preFilterQueryBuilders);
|
||||
|
||||
return knnRetrieverBuilder;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected KnnRetrieverBuilder createTestInstance() {
|
||||
return createRandomKnnRetrieverBuilder();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected KnnRetrieverBuilder doParseInstance(XContentParser parser) throws IOException {
|
||||
return KnnRetrieverBuilder.fromXContent(
|
||||
parser,
|
||||
new RetrieverParserContext(
|
||||
new SearchUsage(),
|
||||
nf -> nf == RetrieverBuilder.RETRIEVERS_SUPPORTED || nf == KnnRetrieverBuilder.KNN_RETRIEVER_SUPPORTED
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean supportsUnknownFields() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected NamedXContentRegistry xContentRegistry() {
|
||||
return new NamedXContentRegistry(new SearchModule(Settings.EMPTY, List.of()).getNamedXContents());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,102 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.retriever;
|
||||
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.search.SearchModule;
|
||||
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.xcontent.NamedXContentRegistry;
|
||||
import org.elasticsearch.xcontent.XContentParser;
|
||||
import org.elasticsearch.xcontent.json.JsonXContent;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Tests exceptions related to usage of restricted global values with a retriever.
|
||||
*/
|
||||
public class RetrieverBuilderErrorTests extends ESTestCase {
|
||||
|
||||
public void testRetrieverExtractionErrors() throws IOException {
|
||||
try (
|
||||
XContentParser parser = createParser(
|
||||
JsonXContent.jsonXContent,
|
||||
"{\"query\": {\"match_all\": {}}, \"retriever\":{\"standard\":{}}}"
|
||||
)
|
||||
) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> ssb.parseXContent(parser, true, nf -> true));
|
||||
assertEquals("cannot specify [retriever] and [query]", iae.getMessage());
|
||||
}
|
||||
|
||||
try (
|
||||
XContentParser parser = createParser(
|
||||
JsonXContent.jsonXContent,
|
||||
"{\"knn\":{\"field\": \"test\", \"k\": 2, \"num_candidates\": 5,"
|
||||
+ " \"query_vector\": [1, 2, 3]}, \"retriever\":{\"standard\":{}}}"
|
||||
)
|
||||
) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> ssb.parseXContent(parser, true, nf -> true));
|
||||
assertEquals("cannot specify [retriever] and [knn]", iae.getMessage());
|
||||
}
|
||||
|
||||
try (XContentParser parser = createParser(JsonXContent.jsonXContent, "{\"search_after\": [1], \"retriever\":{\"standard\":{}}}")) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> ssb.parseXContent(parser, true, nf -> true));
|
||||
assertEquals("cannot specify [retriever] and [search_after]", iae.getMessage());
|
||||
}
|
||||
|
||||
try (XContentParser parser = createParser(JsonXContent.jsonXContent, "{\"terminate_after\": 1, \"retriever\":{\"standard\":{}}}")) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> ssb.parseXContent(parser, true, nf -> true));
|
||||
assertEquals("cannot specify [retriever] and [terminate_after]", iae.getMessage());
|
||||
}
|
||||
|
||||
try (XContentParser parser = createParser(JsonXContent.jsonXContent, "{\"sort\": [\"field\"], \"retriever\":{\"standard\":{}}}")) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> ssb.parseXContent(parser, true, nf -> true));
|
||||
assertEquals("cannot specify [retriever] and [sort]", iae.getMessage());
|
||||
}
|
||||
|
||||
try (
|
||||
XContentParser parser = createParser(
|
||||
JsonXContent.jsonXContent,
|
||||
"{\"rescore\": {\"query\": {\"rescore_query\": {\"match_all\": {}}}}, \"retriever\":{\"standard\":{}}}"
|
||||
)
|
||||
) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> ssb.parseXContent(parser, true, nf -> true));
|
||||
assertEquals("cannot specify [retriever] and [rescore]", iae.getMessage());
|
||||
}
|
||||
|
||||
try (XContentParser parser = createParser(JsonXContent.jsonXContent, "{\"min_score\": 2, \"retriever\":{\"standard\":{}}}")) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> ssb.parseXContent(parser, true, nf -> true));
|
||||
assertEquals("cannot specify [retriever] and [min_score]", iae.getMessage());
|
||||
}
|
||||
|
||||
try (
|
||||
XContentParser parser = createParser(
|
||||
JsonXContent.jsonXContent,
|
||||
"" + "{\"min_score\": 2, \"query\": {\"match_all\": {}}, \"retriever\":{\"standard\":{}}, \"terminate_after\": 1}"
|
||||
)
|
||||
) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> ssb.parseXContent(parser, true, nf -> true));
|
||||
assertEquals("cannot specify [retriever] and [query, terminate_after, min_score]", iae.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected NamedXContentRegistry xContentRegistry() {
|
||||
return new NamedXContentRegistry(new SearchModule(Settings.EMPTY, List.of()).getNamedXContents());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.retriever;
|
||||
|
||||
import org.elasticsearch.common.ParsingException;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.features.NodeFeature;
|
||||
import org.elasticsearch.search.SearchModule;
|
||||
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.xcontent.NamedXContentRegistry;
|
||||
import org.elasticsearch.xcontent.XContentParser;
|
||||
import org.elasticsearch.xcontent.json.JsonXContent;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
/** Tests retrievers validate on their own {@link NodeFeature} */
|
||||
public class RetrieverBuilderVersionTests extends ESTestCase {
|
||||
|
||||
public void testRetrieverVersions() throws IOException {
|
||||
try (XContentParser parser = createParser(JsonXContent.jsonXContent, "{\"retriever\":{\"standard\":{}}}")) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
ParsingException iae = expectThrows(ParsingException.class, () -> ssb.parseXContent(parser, true, nf -> false));
|
||||
assertEquals("Unknown key for a START_OBJECT in [retriever].", iae.getMessage());
|
||||
}
|
||||
|
||||
try (XContentParser parser = createParser(JsonXContent.jsonXContent, "{\"retriever\":{\"standard\":{}}}")) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
ParsingException iae = expectThrows(
|
||||
ParsingException.class,
|
||||
() -> ssb.parseXContent(parser, true, nf -> nf == RetrieverBuilder.RETRIEVERS_SUPPORTED)
|
||||
);
|
||||
assertEquals("unknown retriever [standard]", iae.getMessage());
|
||||
}
|
||||
|
||||
try (XContentParser parser = createParser(JsonXContent.jsonXContent, "{\"retriever\":{\"standard\":{}}}")) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
ssb.parseXContent(
|
||||
parser,
|
||||
true,
|
||||
nf -> nf == RetrieverBuilder.RETRIEVERS_SUPPORTED || nf == StandardRetrieverBuilder.STANDARD_RETRIEVER_SUPPORTED
|
||||
);
|
||||
}
|
||||
|
||||
try (XContentParser parser = createParser(JsonXContent.jsonXContent, "{\"retriever\":{\"knn\":{}}}")) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
ParsingException iae = expectThrows(
|
||||
ParsingException.class,
|
||||
() -> ssb.parseXContent(parser, true, nf -> nf == RetrieverBuilder.RETRIEVERS_SUPPORTED)
|
||||
);
|
||||
assertEquals("unknown retriever [knn]", iae.getMessage());
|
||||
}
|
||||
|
||||
try (
|
||||
XContentParser parser = createParser(
|
||||
JsonXContent.jsonXContent,
|
||||
"{\"retriever\":{\"knn\":{\"field\": \"test\", \"k\": 2, \"num_candidates\": 5, \"query_vector\": [1, 2, 3]}}}"
|
||||
)
|
||||
) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
ssb.parseXContent(
|
||||
parser,
|
||||
true,
|
||||
nf -> nf == RetrieverBuilder.RETRIEVERS_SUPPORTED || nf == KnnRetrieverBuilder.KNN_RETRIEVER_SUPPORTED
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected NamedXContentRegistry xContentRegistry() {
|
||||
return new NamedXContentRegistry(new SearchModule(Settings.EMPTY, List.of()).getNamedXContents());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.retriever;
|
||||
|
||||
import org.elasticsearch.action.search.SearchRequest;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.query.RandomQueryBuilder;
|
||||
import org.elasticsearch.search.SearchModule;
|
||||
import org.elasticsearch.search.collapse.CollapseBuilderTests;
|
||||
import org.elasticsearch.search.searchafter.SearchAfterBuilderTests;
|
||||
import org.elasticsearch.search.sort.SortBuilderTests;
|
||||
import org.elasticsearch.test.AbstractXContentTestCase;
|
||||
import org.elasticsearch.usage.SearchUsage;
|
||||
import org.elasticsearch.xcontent.NamedXContentRegistry;
|
||||
import org.elasticsearch.xcontent.XContent;
|
||||
import org.elasticsearch.xcontent.XContentParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UncheckedIOException;
|
||||
import java.util.List;
|
||||
import java.util.function.BiFunction;
|
||||
|
||||
public class StandardRetrieverBuilderParsingTests extends AbstractXContentTestCase<StandardRetrieverBuilder> {
|
||||
|
||||
/**
|
||||
* Creates a random {@link StandardRetrieverBuilder}. The created instance
|
||||
* is not guaranteed to pass {@link SearchRequest} validation. This is purely
|
||||
* for x-content testing.
|
||||
*/
|
||||
public static StandardRetrieverBuilder createRandomStandardRetrieverBuilder(
|
||||
BiFunction<XContent, BytesReference, XContentParser> createParser
|
||||
) {
|
||||
try {
|
||||
StandardRetrieverBuilder standardRetrieverBuilder = new StandardRetrieverBuilder();
|
||||
|
||||
if (randomBoolean()) {
|
||||
for (int i = 0; i < randomIntBetween(1, 3); ++i) {
|
||||
standardRetrieverBuilder.getPreFilterQueryBuilders().add(RandomQueryBuilder.createQuery(random()));
|
||||
}
|
||||
}
|
||||
|
||||
if (randomBoolean()) {
|
||||
standardRetrieverBuilder.queryBuilder = RandomQueryBuilder.createQuery(random());
|
||||
}
|
||||
|
||||
if (randomBoolean()) {
|
||||
standardRetrieverBuilder.searchAfterBuilder = SearchAfterBuilderTests.randomJsonSearchFromBuilder(createParser);
|
||||
}
|
||||
|
||||
if (randomBoolean()) {
|
||||
standardRetrieverBuilder.terminateAfter = randomNonNegativeInt();
|
||||
}
|
||||
|
||||
if (randomBoolean()) {
|
||||
standardRetrieverBuilder.sortBuilders = SortBuilderTests.randomSortBuilderList();
|
||||
}
|
||||
|
||||
if (randomBoolean()) {
|
||||
standardRetrieverBuilder.collapseBuilder = CollapseBuilderTests.randomCollapseBuilder(randomBoolean());
|
||||
}
|
||||
|
||||
return standardRetrieverBuilder;
|
||||
} catch (IOException ioe) {
|
||||
throw new UncheckedIOException(ioe);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected StandardRetrieverBuilder createTestInstance() {
|
||||
return createRandomStandardRetrieverBuilder((xContent, data) -> {
|
||||
try {
|
||||
return createParser(xContent, data);
|
||||
} catch (IOException ioe) {
|
||||
throw new UncheckedIOException(ioe);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
protected StandardRetrieverBuilder doParseInstance(XContentParser parser) throws IOException {
|
||||
return StandardRetrieverBuilder.fromXContent(
|
||||
parser,
|
||||
new RetrieverParserContext(
|
||||
new SearchUsage(),
|
||||
nf -> nf == RetrieverBuilder.RETRIEVERS_SUPPORTED || nf == StandardRetrieverBuilder.STANDARD_RETRIEVER_SUPPORTED
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean supportsUnknownFields() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String[] getShuffleFieldsExceptions() {
|
||||
// disable xcontent shuffling on the highlight builder
|
||||
return new String[] { "fields" };
|
||||
}
|
||||
|
||||
@Override
|
||||
protected NamedXContentRegistry xContentRegistry() {
|
||||
return new NamedXContentRegistry(new SearchModule(Settings.EMPTY, List.of()).getNamedXContents());
|
||||
}
|
||||
}
|
|
@ -29,6 +29,7 @@ import org.elasticsearch.search.sort.BucketedSort;
|
|||
import org.elasticsearch.search.sort.SortAndFormats;
|
||||
import org.elasticsearch.search.sort.SortOrder;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.xcontent.XContent;
|
||||
import org.elasticsearch.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.xcontent.XContentFactory;
|
||||
import org.elasticsearch.xcontent.XContentParser;
|
||||
|
@ -36,8 +37,10 @@ import org.elasticsearch.xcontent.XContentType;
|
|||
import org.elasticsearch.xcontent.json.JsonXContent;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UncheckedIOException;
|
||||
import java.math.BigDecimal;
|
||||
import java.util.Collections;
|
||||
import java.util.function.BiFunction;
|
||||
|
||||
import static org.elasticsearch.search.searchafter.SearchAfterBuilder.extractSortType;
|
||||
import static org.elasticsearch.test.EqualsHashCodeTestUtils.checkEqualsAndHashCode;
|
||||
|
@ -47,7 +50,10 @@ import static org.hamcrest.Matchers.equalTo;
|
|||
public class SearchAfterBuilderTests extends ESTestCase {
|
||||
private static final int NUMBER_OF_TESTBUILDERS = 20;
|
||||
|
||||
private static SearchAfterBuilder randomSearchAfterBuilder() throws IOException {
|
||||
/**
|
||||
* Generates a random {@link SearchAfterBuilder}.
|
||||
*/
|
||||
public static SearchAfterBuilder randomSearchAfterBuilder() throws IOException {
|
||||
int numSearchFrom = randomIntBetween(1, 10);
|
||||
SearchAfterBuilder searchAfterBuilder = new SearchAfterBuilder();
|
||||
Object[] values = new Object[numSearchFrom];
|
||||
|
@ -71,11 +77,14 @@ public class SearchAfterBuilderTests extends ESTestCase {
|
|||
return searchAfterBuilder;
|
||||
}
|
||||
|
||||
// We build a json version of the search_after first in order to
|
||||
// ensure that every number type remain the same before/after xcontent (de)serialization.
|
||||
// This is not a problem because the final type of each field value is extracted from associated sort field.
|
||||
// This little trick ensure that equals and hashcode are the same when using the xcontent serialization.
|
||||
private SearchAfterBuilder randomJsonSearchFromBuilder() throws IOException {
|
||||
/**
|
||||
* We build a json version of the search_after first in order to
|
||||
* ensure that every number type remain the same before/after xcontent (de)serialization.
|
||||
* This is not a problem because the final type of each field value is extracted from associated sort field.
|
||||
* This little trick ensure that equals and hashcode are the same when using the xcontent serialization.
|
||||
*/
|
||||
public static SearchAfterBuilder randomJsonSearchFromBuilder(BiFunction<XContent, BytesReference, XContentParser> createParser)
|
||||
throws IOException {
|
||||
int numSearchAfter = randomIntBetween(1, 10);
|
||||
XContentBuilder jsonBuilder = XContentFactory.jsonBuilder();
|
||||
jsonBuilder.startObject();
|
||||
|
@ -97,7 +106,7 @@ public class SearchAfterBuilderTests extends ESTestCase {
|
|||
}
|
||||
jsonBuilder.endArray();
|
||||
jsonBuilder.endObject();
|
||||
try (XContentParser parser = createParser(JsonXContent.jsonXContent, BytesReference.bytes(jsonBuilder))) {
|
||||
try (XContentParser parser = createParser.apply(JsonXContent.jsonXContent, BytesReference.bytes(jsonBuilder))) {
|
||||
parser.nextToken();
|
||||
parser.nextToken();
|
||||
parser.nextToken();
|
||||
|
@ -128,7 +137,13 @@ public class SearchAfterBuilderTests extends ESTestCase {
|
|||
|
||||
public void testFromXContent() throws Exception {
|
||||
for (int runs = 0; runs < 20; runs++) {
|
||||
SearchAfterBuilder searchAfterBuilder = randomJsonSearchFromBuilder();
|
||||
SearchAfterBuilder searchAfterBuilder = randomJsonSearchFromBuilder((xContent, data) -> {
|
||||
try {
|
||||
return createParser(xContent, data);
|
||||
} catch (IOException ioe) {
|
||||
throw new UncheckedIOException(ioe);
|
||||
}
|
||||
});
|
||||
XContentBuilder builder = XContentFactory.contentBuilder(randomFrom(XContentType.values()));
|
||||
if (randomBoolean()) {
|
||||
builder.prettyPrint();
|
||||
|
|
|
@ -220,7 +220,7 @@ public class KnnSearchBuilderTests extends AbstractXContentSerializingTestCase<K
|
|||
assertThat(((RewriteableQuery) rewritten.filterQueries.get(0)).rewrites, equalTo(1));
|
||||
}
|
||||
|
||||
static float[] randomVector(int dim) {
|
||||
public static float[] randomVector(int dim) {
|
||||
float[] vector = new float[dim];
|
||||
for (int i = 0; i < vector.length; i++) {
|
||||
vector[i] = randomFloat();
|
||||
|
|
|
@ -0,0 +1,93 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.retriever;
|
||||
|
||||
import org.elasticsearch.action.search.SearchRequest;
|
||||
import org.elasticsearch.plugins.SearchPlugin;
|
||||
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.xcontent.ConstructingObjectParser;
|
||||
import org.elasticsearch.xcontent.ParseField;
|
||||
import org.elasticsearch.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.xcontent.XContentParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
||||
import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;
|
||||
|
||||
/**
|
||||
* Test retriever is used to test parsing of retrievers in plugins where
|
||||
* generation of other random retrievers are not easily accessible through test code.
|
||||
*/
|
||||
public class TestRetrieverBuilder extends RetrieverBuilder {
|
||||
|
||||
/**
|
||||
* Creates a random {@link TestRetrieverBuilder}. The created instance
|
||||
* is not guaranteed to pass {@link SearchRequest} validation. This is purely
|
||||
* for x-content testing.
|
||||
*/
|
||||
public static TestRetrieverBuilder createRandomTestRetrieverBuilder() {
|
||||
return new TestRetrieverBuilder(ESTestCase.randomAlphaOfLengthBetween(5, 10));
|
||||
}
|
||||
|
||||
public static final String NAME = "test";
|
||||
public static final ParseField TEST_FIELD = new ParseField(NAME);
|
||||
public static final SearchPlugin.RetrieverSpec<TestRetrieverBuilder> TEST_SPEC = new SearchPlugin.RetrieverSpec<>(
|
||||
TEST_FIELD,
|
||||
TestRetrieverBuilder::fromXContent
|
||||
);
|
||||
|
||||
public static final ParseField VALUE_FIELD = new ParseField("value");
|
||||
|
||||
public static final ConstructingObjectParser<TestRetrieverBuilder, RetrieverParserContext> PARSER = new ConstructingObjectParser<>(
|
||||
NAME,
|
||||
args -> new TestRetrieverBuilder((String) args[0])
|
||||
);
|
||||
|
||||
static {
|
||||
PARSER.declareString(constructorArg(), VALUE_FIELD);
|
||||
}
|
||||
|
||||
public static TestRetrieverBuilder fromXContent(XContentParser parser, RetrieverParserContext context) {
|
||||
return PARSER.apply(parser, context);
|
||||
}
|
||||
|
||||
private final String value;
|
||||
|
||||
public TestRetrieverBuilder(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder, boolean compoundUsed) {
|
||||
throw new UnsupportedOperationException("only used for parsing tests");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return NAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doToXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.field(VALUE_FIELD.getPreferredName(), value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean doEquals(Object o) {
|
||||
TestRetrieverBuilder that = (TestRetrieverBuilder) o;
|
||||
return Objects.equals(value, that.value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doHashCode() {
|
||||
return Objects.hash(value);
|
||||
}
|
||||
}
|
|
@ -5,6 +5,8 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import org.elasticsearch.xpack.rank.rrf.RRFFeatures;
|
||||
|
||||
module org.elasticsearch.rank.rrf {
|
||||
requires org.apache.lucene.core;
|
||||
requires org.elasticsearch.base;
|
||||
|
@ -13,4 +15,6 @@ module org.elasticsearch.rank.rrf {
|
|||
requires org.elasticsearch.xcore;
|
||||
|
||||
exports org.elasticsearch.xpack.rank.rrf;
|
||||
|
||||
provides org.elasticsearch.features.FeatureSpecification with RRFFeatures;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.rank.rrf;
|
||||
|
||||
import org.elasticsearch.features.FeatureSpecification;
|
||||
import org.elasticsearch.features.NodeFeature;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* A set of features specifically for the rrf plugin.
|
||||
*/
|
||||
public class RRFFeatures implements FeatureSpecification {
|
||||
|
||||
@Override
|
||||
public Set<NodeFeature> getFeatures() {
|
||||
return Set.of(RRFRetrieverBuilder.RRF_RETRIEVER_SUPPORTED);
|
||||
}
|
||||
}
|
|
@ -41,4 +41,9 @@ public class RRFRankPlugin extends Plugin implements SearchPlugin {
|
|||
public List<NamedXContentRegistry.Entry> getNamedXContent() {
|
||||
return List.of(new NamedXContentRegistry.Entry(RankBuilder.class, new ParseField(NAME), RRFRankBuilder::fromXContent));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<RetrieverSpec<?>> getRetrievers() {
|
||||
return List.of(new RetrieverSpec<>(new ParseField(NAME), RRFRetrieverBuilder::fromXContent));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,134 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.rank.rrf;
|
||||
|
||||
import org.elasticsearch.common.ParsingException;
|
||||
import org.elasticsearch.features.NodeFeature;
|
||||
import org.elasticsearch.license.LicenseUtils;
|
||||
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
||||
import org.elasticsearch.search.retriever.RetrieverBuilder;
|
||||
import org.elasticsearch.search.retriever.RetrieverParserContext;
|
||||
import org.elasticsearch.xcontent.ObjectParser;
|
||||
import org.elasticsearch.xcontent.ParseField;
|
||||
import org.elasticsearch.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.xcontent.XContentParser;
|
||||
import org.elasticsearch.xpack.core.XPackPlugin;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import static org.elasticsearch.xpack.rank.rrf.RRFRankPlugin.NAME;
|
||||
|
||||
/**
|
||||
* An rrf retriever is used to represent an rrf rank element, but
|
||||
* as a tree-like structure. This retriever is a compound retriever
|
||||
* meaning it has a set of child retrievers that each return a set of
|
||||
* top docs that will then be combined and ranked according to the rrf
|
||||
* formula.
|
||||
*/
|
||||
public final class RRFRetrieverBuilder extends RetrieverBuilder {
|
||||
|
||||
public static final NodeFeature RRF_RETRIEVER_SUPPORTED = new NodeFeature("rrf_retriever_supported");
|
||||
|
||||
public static final ParseField RETRIEVERS_FIELD = new ParseField("retrievers");
|
||||
public static final ParseField WINDOW_SIZE_FIELD = new ParseField("window_size");
|
||||
public static final ParseField RANK_CONSTANT_FIELD = new ParseField("rank_constant");
|
||||
|
||||
public static final ObjectParser<RRFRetrieverBuilder, RetrieverParserContext> PARSER = new ObjectParser<>(
|
||||
NAME,
|
||||
RRFRetrieverBuilder::new
|
||||
);
|
||||
|
||||
static {
|
||||
PARSER.declareObjectArray((r, v) -> r.retrieverBuilders = v, (p, c) -> {
|
||||
p.nextToken();
|
||||
String name = p.currentName();
|
||||
RetrieverBuilder retrieverBuilder = p.namedObject(RetrieverBuilder.class, name, c);
|
||||
p.nextToken();
|
||||
return retrieverBuilder;
|
||||
}, RETRIEVERS_FIELD);
|
||||
PARSER.declareInt((r, v) -> r.windowSize = v, WINDOW_SIZE_FIELD);
|
||||
PARSER.declareInt((r, v) -> r.rankConstant = v, RANK_CONSTANT_FIELD);
|
||||
|
||||
RetrieverBuilder.declareBaseParserFields(NAME, PARSER);
|
||||
}
|
||||
|
||||
public static RRFRetrieverBuilder fromXContent(XContentParser parser, RetrieverParserContext context) throws IOException {
|
||||
if (context.clusterSupportsFeature(RRF_RETRIEVER_SUPPORTED) == false) {
|
||||
throw new ParsingException(parser.getTokenLocation(), "unknown retriever [" + NAME + "]");
|
||||
}
|
||||
if (RRFRankPlugin.RANK_RRF_FEATURE.check(XPackPlugin.getSharedLicenseState()) == false) {
|
||||
throw LicenseUtils.newComplianceException("Reciprocal Rank Fusion (RRF)");
|
||||
}
|
||||
return PARSER.apply(parser, context);
|
||||
}
|
||||
|
||||
List<RetrieverBuilder> retrieverBuilders = Collections.emptyList();
|
||||
int windowSize = RRFRankBuilder.DEFAULT_WINDOW_SIZE;
|
||||
int rankConstant = RRFRankBuilder.DEFAULT_RANK_CONSTANT;
|
||||
|
||||
@Override
|
||||
public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder, boolean compoundUsed) {
|
||||
if (compoundUsed) {
|
||||
throw new IllegalArgumentException("[rank] cannot be used in children of compound retrievers");
|
||||
}
|
||||
|
||||
for (RetrieverBuilder retrieverBuilder : retrieverBuilders) {
|
||||
if (preFilterQueryBuilders.isEmpty() == false) {
|
||||
retrieverBuilder.getPreFilterQueryBuilders().addAll(preFilterQueryBuilders);
|
||||
}
|
||||
|
||||
retrieverBuilder.extractToSearchSourceBuilder(searchSourceBuilder, true);
|
||||
}
|
||||
|
||||
searchSourceBuilder.rankBuilder(new RRFRankBuilder(windowSize, rankConstant));
|
||||
}
|
||||
|
||||
// ---- FOR TESTING XCONTENT PARSING ----
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return NAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doToXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
if (retrieverBuilders.isEmpty() == false) {
|
||||
builder.startArray(RETRIEVERS_FIELD.getPreferredName());
|
||||
|
||||
for (RetrieverBuilder retrieverBuilder : retrieverBuilders) {
|
||||
builder.startObject();
|
||||
builder.field(retrieverBuilder.getName());
|
||||
retrieverBuilder.toXContent(builder, params);
|
||||
builder.endObject();
|
||||
}
|
||||
|
||||
builder.endArray();
|
||||
}
|
||||
|
||||
builder.field(WINDOW_SIZE_FIELD.getPreferredName(), windowSize);
|
||||
builder.field(RANK_CONSTANT_FIELD.getPreferredName(), rankConstant);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean doEquals(Object o) {
|
||||
RRFRetrieverBuilder that = (RRFRetrieverBuilder) o;
|
||||
return windowSize == that.windowSize
|
||||
&& rankConstant == that.rankConstant
|
||||
&& Objects.equals(retrieverBuilders, that.retrieverBuilders);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doHashCode() {
|
||||
return Objects.hash(retrieverBuilders, windowSize, rankConstant);
|
||||
}
|
||||
|
||||
// ---- END FOR TESTING ----
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
#
|
||||
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
# or more contributor license agreements. Licensed under the Elastic License
|
||||
# 2.0; you may not use this file except in compliance with the Elastic License
|
||||
# 2.0.
|
||||
#
|
||||
|
||||
org.elasticsearch.xpack.rank.rrf.RRFFeatures
|
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.rank.rrf;
|
||||
|
||||
import org.elasticsearch.action.search.SearchRequest;
|
||||
import org.elasticsearch.search.retriever.RetrieverBuilder;
|
||||
import org.elasticsearch.search.retriever.RetrieverParserContext;
|
||||
import org.elasticsearch.search.retriever.TestRetrieverBuilder;
|
||||
import org.elasticsearch.test.AbstractXContentTestCase;
|
||||
import org.elasticsearch.usage.SearchUsage;
|
||||
import org.elasticsearch.xcontent.NamedXContentRegistry;
|
||||
import org.elasticsearch.xcontent.ParseField;
|
||||
import org.elasticsearch.xcontent.XContentParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class RRFRetrieverBuilderParsingTests extends AbstractXContentTestCase<RRFRetrieverBuilder> {
|
||||
|
||||
/**
|
||||
* Creates a random {@link RRFRetrieverBuilder}. The created instance
|
||||
* is not guaranteed to pass {@link SearchRequest} validation. This is purely
|
||||
* for x-content testing.
|
||||
*/
|
||||
public static RRFRetrieverBuilder createRandomRRFRetrieverBuilder() {
|
||||
RRFRetrieverBuilder rrfRetrieverBuilder = new RRFRetrieverBuilder();
|
||||
|
||||
if (randomBoolean()) {
|
||||
rrfRetrieverBuilder.windowSize = randomIntBetween(1, 10000);
|
||||
}
|
||||
|
||||
if (randomBoolean()) {
|
||||
rrfRetrieverBuilder.rankConstant = randomIntBetween(1, 1000000);
|
||||
}
|
||||
|
||||
int retrieverCount = randomIntBetween(2, 50);
|
||||
rrfRetrieverBuilder.retrieverBuilders = new ArrayList<>(retrieverCount);
|
||||
|
||||
while (retrieverCount > 0) {
|
||||
rrfRetrieverBuilder.retrieverBuilders.add(TestRetrieverBuilder.createRandomTestRetrieverBuilder());
|
||||
--retrieverCount;
|
||||
}
|
||||
|
||||
return rrfRetrieverBuilder;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected RRFRetrieverBuilder createTestInstance() {
|
||||
return createRandomRRFRetrieverBuilder();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected RRFRetrieverBuilder doParseInstance(XContentParser parser) throws IOException {
|
||||
return RRFRetrieverBuilder.PARSER.apply(parser, new RetrieverParserContext(new SearchUsage(), nf -> true));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean supportsUnknownFields() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected NamedXContentRegistry xContentRegistry() {
|
||||
List<NamedXContentRegistry.Entry> entries = new ArrayList<>();
|
||||
entries.add(
|
||||
new NamedXContentRegistry.Entry(
|
||||
RetrieverBuilder.class,
|
||||
TestRetrieverBuilder.TEST_SPEC.getName(),
|
||||
(p, c) -> TestRetrieverBuilder.TEST_SPEC.getParser().fromXContent(p, (RetrieverParserContext) c),
|
||||
TestRetrieverBuilder.TEST_SPEC.getName().getForRestApiVersion()
|
||||
)
|
||||
);
|
||||
entries.add(
|
||||
new NamedXContentRegistry.Entry(
|
||||
RetrieverBuilder.class,
|
||||
new ParseField(RRFRankPlugin.NAME),
|
||||
(p, c) -> RRFRetrieverBuilder.PARSER.apply(p, (RetrieverParserContext) c)
|
||||
)
|
||||
);
|
||||
return new NamedXContentRegistry(entries);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,151 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.rank.rrf;
|
||||
|
||||
import org.elasticsearch.common.ParsingException;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.features.NodeFeature;
|
||||
import org.elasticsearch.search.SearchModule;
|
||||
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
||||
import org.elasticsearch.search.retriever.RetrieverBuilder;
|
||||
import org.elasticsearch.search.retriever.RetrieverParserContext;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.xcontent.NamedXContentRegistry;
|
||||
import org.elasticsearch.xcontent.ParseField;
|
||||
import org.elasticsearch.xcontent.XContentParser;
|
||||
import org.elasticsearch.xcontent.json.JsonXContent;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
/** Tests for the rrf retriever. */
|
||||
public class RRFRetrieverBuilderTests extends ESTestCase {
|
||||
|
||||
/** Tests the rrf retriever validates on its own {@link NodeFeature} */
|
||||
public void testRetrieverVersions() throws IOException {
|
||||
try (XContentParser parser = createParser(JsonXContent.jsonXContent, "{\"retriever\":{\"rrf\":{}}}")) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
ParsingException iae = expectThrows(
|
||||
ParsingException.class,
|
||||
() -> ssb.parseXContent(parser, true, nf -> nf == RetrieverBuilder.RETRIEVERS_SUPPORTED)
|
||||
);
|
||||
assertEquals("unknown retriever [rrf]", iae.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/** Tests extraction errors related to compound retrievers. These tests require a compound retriever which is why they are here. */
|
||||
public void testRetrieverExtractionErrors() throws IOException {
|
||||
try (
|
||||
XContentParser parser = createParser(
|
||||
JsonXContent.jsonXContent,
|
||||
"{\"retriever\":{\"rrf_nl\":{\"retrievers\":"
|
||||
+ "[{\"standard\":{\"search_after\":[1]}},{\"standard\":{\"search_after\":[2]}}]}}}"
|
||||
)
|
||||
) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> ssb.parseXContent(parser, true, nf -> true));
|
||||
assertEquals("[search_after] cannot be used in children of compound retrievers", iae.getMessage());
|
||||
}
|
||||
|
||||
try (
|
||||
XContentParser parser = createParser(
|
||||
JsonXContent.jsonXContent,
|
||||
"{\"retriever\":{\"rrf_nl\":{\"retrievers\":"
|
||||
+ "[{\"standard\":{\"terminate_after\":1}},{\"standard\":{\"terminate_after\":2}}]}}}"
|
||||
)
|
||||
) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> ssb.parseXContent(parser, true, nf -> true));
|
||||
assertEquals("[terminate_after] cannot be used in children of compound retrievers", iae.getMessage());
|
||||
}
|
||||
|
||||
try (
|
||||
XContentParser parser = createParser(
|
||||
JsonXContent.jsonXContent,
|
||||
"{\"retriever\":{\"rrf_nl\":{\"retrievers\":" + "[{\"standard\":{\"sort\":[\"f1\"]}},{\"standard\":{\"sort\":[\"f2\"]}}]}}}"
|
||||
)
|
||||
) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> ssb.parseXContent(parser, true, nf -> true));
|
||||
assertEquals("[sort] cannot be used in children of compound retrievers", iae.getMessage());
|
||||
}
|
||||
|
||||
try (
|
||||
XContentParser parser = createParser(
|
||||
JsonXContent.jsonXContent,
|
||||
"{\"retriever\":{\"rrf_nl\":{\"retrievers\":" + "[{\"standard\":{\"min_score\":1}},{\"standard\":{\"min_score\":2}}]}}}"
|
||||
)
|
||||
) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> ssb.parseXContent(parser, true, nf -> true));
|
||||
assertEquals("[min_score] cannot be used in children of compound retrievers", iae.getMessage());
|
||||
}
|
||||
|
||||
try (
|
||||
XContentParser parser = createParser(
|
||||
JsonXContent.jsonXContent,
|
||||
"{\"retriever\":{\"rrf_nl\":{\"retrievers\":"
|
||||
+ "[{\"standard\":{\"collapse\":{\"field\":\"f0\"}}},{\"standard\":{\"collapse\":{\"field\":\"f1\"}}}]}}}"
|
||||
)
|
||||
) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> ssb.parseXContent(parser, true, nf -> true));
|
||||
assertEquals("[collapse] cannot be used in children of compound retrievers", iae.getMessage());
|
||||
}
|
||||
|
||||
try (
|
||||
XContentParser parser = createParser(
|
||||
JsonXContent.jsonXContent,
|
||||
"{\"retriever\":{\"rrf_nl\":{\"retrievers\":[{\"rrf_nl\":{}}]}}}"
|
||||
)
|
||||
) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> ssb.parseXContent(parser, true, nf -> true));
|
||||
assertEquals("[rank] cannot be used in children of compound retrievers", iae.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/** Tests max depth errors related to compound retrievers. These tests require a compound retriever which is why they are here. */
|
||||
public void testRetrieverBuilderParsingMaxDepth() throws IOException {
|
||||
try (
|
||||
XContentParser parser = createParser(
|
||||
JsonXContent.jsonXContent,
|
||||
"{\"retriever\":{\"rrf_nl\":{\"retrievers\":[{\"rrf_nl\":{\"retrievers\":[{\"standard\":{}}]}}]}}}"
|
||||
)
|
||||
) {
|
||||
SearchSourceBuilder ssb = new SearchSourceBuilder();
|
||||
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> ssb.parseXContent(parser, true, nf -> true));
|
||||
assertEquals("[1:65] [rrf] failed to parse field [retrievers]", iae.getMessage());
|
||||
assertEquals(
|
||||
"the nested depth of the [standard] retriever exceeds the maximum nested depth [2] for retrievers",
|
||||
iae.getCause().getCause().getMessage()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected NamedXContentRegistry xContentRegistry() {
|
||||
List<NamedXContentRegistry.Entry> entries = new SearchModule(Settings.EMPTY, List.of()).getNamedXContents();
|
||||
entries.add(
|
||||
new NamedXContentRegistry.Entry(
|
||||
RetrieverBuilder.class,
|
||||
new ParseField(RRFRankPlugin.NAME),
|
||||
(p, c) -> RRFRetrieverBuilder.fromXContent(p, (RetrieverParserContext) c)
|
||||
)
|
||||
);
|
||||
// Add an entry with no license requirement for unit testing
|
||||
entries.add(
|
||||
new NamedXContentRegistry.Entry(
|
||||
RetrieverBuilder.class,
|
||||
new ParseField(RRFRankPlugin.NAME + "_nl"),
|
||||
(p, c) -> RRFRetrieverBuilder.PARSER.apply(p, (RetrieverParserContext) c)
|
||||
)
|
||||
);
|
||||
return new NamedXContentRegistry(entries);
|
||||
}
|
||||
}
|
|
@ -49,7 +49,7 @@ setup:
|
|||
indices.refresh: {}
|
||||
|
||||
---
|
||||
"RRF Invalid License":
|
||||
"rrf invalid license":
|
||||
|
||||
- do:
|
||||
catch: forbidden
|
||||
|
@ -75,3 +75,39 @@ setup:
|
|||
- match: { status: 403 }
|
||||
- match: { error.type: security_exception }
|
||||
- match: { error.reason: "current license is non-compliant for [Reciprocal Rank Fusion (RRF)]" }
|
||||
|
||||
---
|
||||
"rrf retriever invalid license":
|
||||
|
||||
- do:
|
||||
catch: forbidden
|
||||
search:
|
||||
index: test
|
||||
body:
|
||||
track_total_hits: false
|
||||
fields: [ "text" ]
|
||||
retriever:
|
||||
rrf:
|
||||
retrievers: [
|
||||
{
|
||||
knn: {
|
||||
field: vector,
|
||||
query_vector: [ 0.0 ],
|
||||
k: 3,
|
||||
num_candidates: 3
|
||||
}
|
||||
},
|
||||
{
|
||||
standard: {
|
||||
query: {
|
||||
term: {
|
||||
text: term
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
- match: { status: 403 }
|
||||
- match: { error.type: security_exception }
|
||||
- match: { error.reason: "current license is non-compliant for [Reciprocal Rank Fusion (RRF)]" }
|
||||
|
|
|
@ -0,0 +1,331 @@
|
|||
setup:
|
||||
- skip:
|
||||
version: ' - 8.12.99'
|
||||
reason: 'rrf retriever added in 8.13'
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 5
|
||||
number_of_replicas: 0
|
||||
mappings:
|
||||
properties:
|
||||
text:
|
||||
type: text
|
||||
keyword:
|
||||
type: keyword
|
||||
vector:
|
||||
type: dense_vector
|
||||
dims: 1
|
||||
index: true
|
||||
similarity: l2_norm
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
id: "1"
|
||||
body:
|
||||
text: "term term"
|
||||
keyword: "other"
|
||||
vector: [0.0]
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
id: "2"
|
||||
body:
|
||||
text: "other"
|
||||
keyword: "other"
|
||||
vector: [1.0]
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
id: "3"
|
||||
body:
|
||||
text: "term"
|
||||
keyword: "keyword"
|
||||
vector: [2.0]
|
||||
|
||||
- do:
|
||||
indices.refresh: {}
|
||||
|
||||
---
|
||||
"rrf retriever with a standard retriever and a knn retriever":
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
body:
|
||||
track_total_hits: false
|
||||
fields: [ "text", "keyword" ]
|
||||
retriever:
|
||||
rrf:
|
||||
retrievers: [
|
||||
{
|
||||
knn: {
|
||||
field: vector,
|
||||
query_vector: [ 0.0 ],
|
||||
k: 3,
|
||||
num_candidates: 3
|
||||
}
|
||||
},
|
||||
{
|
||||
standard: {
|
||||
query: {
|
||||
term: {
|
||||
text: term
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
window_size: 100
|
||||
rank_constant: 1
|
||||
size: 10
|
||||
|
||||
- match: { hits.hits.0._id: "1" }
|
||||
- match: { hits.hits.0._rank: 1 }
|
||||
- match: { hits.hits.0.fields.text.0: "term term" }
|
||||
- match: { hits.hits.0.fields.keyword.0: "other" }
|
||||
|
||||
- match: { hits.hits.1._id: "3" }
|
||||
- match: { hits.hits.1._rank: 2 }
|
||||
- match: { hits.hits.1.fields.text.0: "term" }
|
||||
- match: { hits.hits.1.fields.keyword.0: "keyword" }
|
||||
|
||||
- match: { hits.hits.2._id: "2" }
|
||||
- match: { hits.hits.2._rank: 3 }
|
||||
- match: { hits.hits.2.fields.text.0: "other" }
|
||||
- match: { hits.hits.2.fields.keyword.0: "other" }
|
||||
|
||||
---
|
||||
"rrf retriever with multiple standard retrievers":
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
body:
|
||||
track_total_hits: true
|
||||
fields: [ "text", "keyword" ]
|
||||
retriever:
|
||||
rrf:
|
||||
retrievers: [
|
||||
{
|
||||
"standard": {
|
||||
"query": {
|
||||
"term": {
|
||||
"text": "term"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"standard": {
|
||||
"query": {
|
||||
"match": {
|
||||
"keyword": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
window_size: 100
|
||||
rank_constant: 1
|
||||
size: 10
|
||||
|
||||
- match: { hits.total.value : 2 }
|
||||
|
||||
- match: { hits.hits.0._id: "3" }
|
||||
- match: { hits.hits.0._rank: 1 }
|
||||
- match: { hits.hits.0.fields.text.0: "term" }
|
||||
- match: { hits.hits.0.fields.keyword.0: "keyword" }
|
||||
|
||||
- match: { hits.hits.1._id: "1" }
|
||||
- match: { hits.hits.1._rank: 2 }
|
||||
- match: { hits.hits.1.fields.text.0: "term term" }
|
||||
- match: { hits.hits.1.fields.keyword.0: "other" }
|
||||
|
||||
---
|
||||
"rrf retriever with multiple standard retrievers and a knn retriever":
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
body:
|
||||
track_total_hits: true
|
||||
fields: [ "text", "keyword" ]
|
||||
retriever:
|
||||
rrf:
|
||||
retrievers: [
|
||||
{
|
||||
knn: {
|
||||
field: vector,
|
||||
query_vector: [ 0.0 ],
|
||||
k: 3,
|
||||
num_candidates: 3
|
||||
}
|
||||
},
|
||||
{
|
||||
"standard": {
|
||||
"query": {
|
||||
"term": {
|
||||
"text": "term"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"standard": {
|
||||
"query": {
|
||||
"match": {
|
||||
"keyword": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
window_size: 100
|
||||
rank_constant: 1
|
||||
size: 10
|
||||
|
||||
- match: { hits.total.value : 3 }
|
||||
|
||||
- match: { hits.hits.0._id: "3" }
|
||||
- match: { hits.hits.0._rank: 1 }
|
||||
- match: { hits.hits.0.fields.text.0: "term" }
|
||||
- match: { hits.hits.0.fields.keyword.0: "keyword" }
|
||||
|
||||
- match: { hits.hits.1._id: "1" }
|
||||
- match: { hits.hits.1._rank: 2 }
|
||||
- match: { hits.hits.1.fields.text.0: "term term" }
|
||||
- match: { hits.hits.1.fields.keyword.0: "other" }
|
||||
|
||||
- match: { hits.hits.2._id: "2" }
|
||||
- match: { hits.hits.2._rank: 3 }
|
||||
- match: { hits.hits.2.fields.text.0: "other" }
|
||||
- match: { hits.hits.2.fields.keyword.0: "other" }
|
||||
|
||||
---
|
||||
"rrf retriever with multiple standard retrievers and multiple knn retriever":
|
||||
|
||||
- do:
|
||||
search:
|
||||
size: 1
|
||||
index: test
|
||||
body:
|
||||
track_total_hits: true
|
||||
fields: [ "text", "keyword" ]
|
||||
retriever:
|
||||
rrf:
|
||||
retrievers: [
|
||||
{
|
||||
knn: {
|
||||
field: vector,
|
||||
query_vector: [ 0.0 ],
|
||||
k: 3,
|
||||
num_candidates: 3
|
||||
}
|
||||
},
|
||||
{
|
||||
knn: {
|
||||
field: vector,
|
||||
query_vector: [ 1.0 ],
|
||||
k: 3,
|
||||
num_candidates: 3
|
||||
}
|
||||
},
|
||||
{
|
||||
"standard": {
|
||||
"query": {
|
||||
"term": {
|
||||
"text": "term"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"standard": {
|
||||
"query": {
|
||||
"match": {
|
||||
"keyword": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
window_size: 2
|
||||
rank_constant: 1
|
||||
|
||||
- match: { hits.total.value : 3 }
|
||||
- length: { hits.hits: 1 }
|
||||
|
||||
- match: { hits.hits.0._id: "3" }
|
||||
- match: { hits.hits.0._rank: 1 }
|
||||
- match: { hits.hits.0.fields.text.0: "term" }
|
||||
- match: { hits.hits.0.fields.keyword.0: "keyword" }
|
||||
|
||||
---
|
||||
"rrf retriever with multiple standard retrievers and multiple knn retriever and a filter":
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
body:
|
||||
track_total_hits: true
|
||||
fields: [ "text", "keyword" ]
|
||||
retriever:
|
||||
rrf:
|
||||
filter: [
|
||||
{
|
||||
term: {
|
||||
keyword: "keyword"
|
||||
}
|
||||
}
|
||||
]
|
||||
retrievers: [
|
||||
{
|
||||
knn: {
|
||||
field: vector,
|
||||
query_vector: [ 0.0 ],
|
||||
k: 3,
|
||||
num_candidates: 3
|
||||
}
|
||||
},
|
||||
{
|
||||
knn: {
|
||||
field: vector,
|
||||
query_vector: [ 1.0 ],
|
||||
k: 3,
|
||||
num_candidates: 3
|
||||
}
|
||||
},
|
||||
{
|
||||
"standard": {
|
||||
"query": {
|
||||
"term": {
|
||||
"text": "term"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"standard": {
|
||||
"query": {
|
||||
"match": {
|
||||
"keyword": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
- match: { hits.total.value : 1 }
|
||||
- length: { hits.hits: 1 }
|
||||
|
||||
- match: { hits.hits.0._id: "3" }
|
||||
- match: { hits.hits.0._rank: 1 }
|
||||
- match: { hits.hits.0.fields.text.0: "term" }
|
||||
- match: { hits.hits.0.fields.keyword.0: "keyword" }
|
|
@ -0,0 +1,342 @@
|
|||
setup:
|
||||
- skip:
|
||||
features: close_to
|
||||
version: ' - 8.13.99'
|
||||
reason: 'rrf retriever added in 8.14'
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 5
|
||||
number_of_replicas: 0
|
||||
mappings:
|
||||
properties:
|
||||
vector_asc:
|
||||
type: dense_vector
|
||||
dims: 1
|
||||
index: true
|
||||
similarity: l2_norm
|
||||
vector_desc:
|
||||
type: dense_vector
|
||||
dims: 1
|
||||
index: true
|
||||
similarity: l2_norm
|
||||
int:
|
||||
type: integer
|
||||
text:
|
||||
type: text
|
||||
|
||||
- do:
|
||||
bulk:
|
||||
index: test
|
||||
refresh: true
|
||||
body: |
|
||||
{ "index": {"_id" : "1"} }
|
||||
{ "vector_asc": [1.0], "vector_desc": [11.0], "int": 1, "text": "term 1" }
|
||||
{ "index": {"_id" : "2"} }
|
||||
{ "vector_asc": [2.0], "vector_desc": [10.0], "int": 2, "text": "term 2" }
|
||||
{ "index": {"_id" : "3"} }
|
||||
{ "vector_asc": [3.0], "vector_desc": [9.0], "int": 3, "text": "term 3" }
|
||||
{ "index": {"_id" : "4"} }
|
||||
{ "vector_asc": [4.0], "vector_desc": [8.0], "int": 1, "text": "term 4" }
|
||||
{ "index": {"_id" : "5"} }
|
||||
{ "vector_asc": [5.0], "vector_desc": [7.0], "int": 2, "text": "term 5" }
|
||||
{ "index": {"_id" : "6"} }
|
||||
{ "vector_asc": [6.0], "vector_desc": [6.0], "int": 3, "text": "term 6" }
|
||||
{ "index": {"_id" : "7"} }
|
||||
{ "vector_asc": [7.0], "vector_desc": [5.0], "int": 1, "text": "term 7" }
|
||||
{ "index": {"_id" : "8"} }
|
||||
{ "vector_asc": [8.0], "vector_desc": [4.0], "int": 2, "text": "term 8" }
|
||||
{ "index": {"_id" : "9"} }
|
||||
{ "vector_asc": [9.0], "vector_desc": [3.0], "int": 3, "text": "term 9" }
|
||||
{ "index": {"_id" : "10"} }
|
||||
{ "vector_asc": [10.0], "vector_desc": [2.0], "int": 1, "text": "term 10" }
|
||||
{ "index": {"_id" : "11"} }
|
||||
{ "vector_asc": [11.0], "vector_desc": [1.0], "int": 2, "text": "term 11" }
|
||||
|
||||
---
|
||||
"rrf retriever using a knn retriever and a standard retriever with a scripted metric aggregation":
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
body:
|
||||
size: 5
|
||||
track_total_hits: true
|
||||
fields: [ "text" ]
|
||||
retriever:
|
||||
rrf:
|
||||
retrievers: [
|
||||
{
|
||||
knn: {
|
||||
field: vector_asc,
|
||||
query_vector: [ 5.0 ],
|
||||
k: 5,
|
||||
num_candidates: 11
|
||||
}
|
||||
},
|
||||
{
|
||||
"standard": {
|
||||
query: {
|
||||
bool: {
|
||||
should: [
|
||||
{
|
||||
term: {
|
||||
text: {
|
||||
value: "6",
|
||||
boost: 10.0
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
term: {
|
||||
text: {
|
||||
value: "5",
|
||||
boost: 7.0
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
term: {
|
||||
text: {
|
||||
value: "7",
|
||||
boost: 7.0
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
term: {
|
||||
text: {
|
||||
value: "4",
|
||||
boost: 3.0
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
term: {
|
||||
text: {
|
||||
value: "3",
|
||||
boost: 2.0
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
window_size: 100
|
||||
rank_constant: 1
|
||||
aggs:
|
||||
sums:
|
||||
scripted_metric:
|
||||
init_script: |
|
||||
state['sums'] = ['asc': [], 'text': []]
|
||||
map_script: |
|
||||
state['sums']['asc'].add($('vector_asc', null).getVector()[0]);
|
||||
state['sums']['text'].add(Integer.parseInt($('text', null).substring(5)));
|
||||
combine_script: |
|
||||
[
|
||||
'asc_total': state['sums']['asc'].stream().mapToDouble(v -> v).sum(),
|
||||
'text_total': state['sums']['text'].stream().mapToInt(v -> v).sum()
|
||||
]
|
||||
reduce_script: |
|
||||
[
|
||||
'asc_total': states.stream().mapToDouble(v -> v['asc_total']).sum(),
|
||||
'text_total': states.stream().mapToInt(v -> v['text_total']).sum()
|
||||
]
|
||||
|
||||
- match: { hits.hits.0._id: "5" }
|
||||
- match: { hits.hits.0._rank: 1 }
|
||||
|
||||
- match: { hits.hits.1._id: "6" }
|
||||
- match: { hits.hits.1._rank: 2 }
|
||||
|
||||
- match: { hits.hits.2._id: "4" }
|
||||
- match: { hits.hits.2._rank: 3 }
|
||||
|
||||
- match: { hits.hits.3._id: "7" }
|
||||
- match: { hits.hits.3._rank: 4 }
|
||||
|
||||
- match: { hits.hits.4._id: "3" }
|
||||
- match: { hits.hits.4._rank: 5 }
|
||||
|
||||
- close_to: { aggregations.sums.value.asc_total: { value: 25.0, error: 0.001 }}
|
||||
- match: { aggregations.sums.value.text_total: 25 }
|
||||
|
||||
---
|
||||
"rrf retriever using multiple knn retrievers with a scripted metric aggregation":
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
body:
|
||||
size: 1
|
||||
track_total_hits: true
|
||||
fields: [ "text" ]
|
||||
retriever:
|
||||
rrf:
|
||||
retrievers: [
|
||||
{
|
||||
knn: {
|
||||
field: vector_asc,
|
||||
query_vector: [ 6.0 ],
|
||||
k: 5,
|
||||
num_candidates: 11
|
||||
}
|
||||
},
|
||||
{
|
||||
knn: {
|
||||
field: vector_desc,
|
||||
query_vector: [ 8.0 ],
|
||||
k: 3,
|
||||
num_candidates: 11
|
||||
}
|
||||
}
|
||||
]
|
||||
window_size: 3
|
||||
rank_constant: 1
|
||||
aggs:
|
||||
sums:
|
||||
scripted_metric:
|
||||
init_script: |
|
||||
state['sums'] = ['asc': [], 'desc': []]
|
||||
map_script: |
|
||||
state['sums']['asc'].add($('vector_asc', null).getVector()[0]);
|
||||
state['sums']['desc'].add($('vector_desc', null).getVector()[0])
|
||||
combine_script: |
|
||||
[
|
||||
'asc_total': state['sums']['asc'].stream().mapToDouble(v -> v).sum(),
|
||||
'desc_total': state['sums']['desc'].stream().mapToDouble(v -> v).sum()
|
||||
]
|
||||
reduce_script: |
|
||||
[
|
||||
'asc_total': states.stream().mapToDouble(v -> v['asc_total']).sum(),
|
||||
'desc_total': states.stream().mapToDouble(v -> v['desc_total']).sum()
|
||||
]
|
||||
|
||||
- match: { hits.total.value: 6 }
|
||||
|
||||
- match: { hits.hits.0._id: "5" }
|
||||
- match: { hits.hits.0._rank: 1 }
|
||||
|
||||
- close_to: { aggregations.sums.value.asc_total: { value: 33.0, error: 0.001 }}
|
||||
- close_to: { aggregations.sums.value.desc_total: { value: 39.0, error: 0.001 }}
|
||||
|
||||
---
|
||||
"rrf retriever using multiple knn retrievers and a standard retriever with a scripted metric aggregation":
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
body:
|
||||
size: 5
|
||||
track_total_hits: true
|
||||
fields: [ "text" ]
|
||||
retriever:
|
||||
rrf:
|
||||
retrievers: [
|
||||
{
|
||||
knn: {
|
||||
field: vector_asc,
|
||||
query_vector: [ 6.0 ],
|
||||
k: 5,
|
||||
num_candidates: 11
|
||||
}
|
||||
},
|
||||
{
|
||||
knn: {
|
||||
field: vector_desc,
|
||||
query_vector: [ 6.0 ],
|
||||
k: 5,
|
||||
num_candidates: 11
|
||||
}
|
||||
},
|
||||
{
|
||||
standard: {
|
||||
query: {
|
||||
bool: {
|
||||
should: [
|
||||
{
|
||||
term: {
|
||||
text: {
|
||||
value: "6",
|
||||
boost: 10.0
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
term: {
|
||||
text: {
|
||||
value: "5",
|
||||
boost: 7.0
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
term: {
|
||||
text: {
|
||||
value: "7",
|
||||
boost: 7.0
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
term: {
|
||||
text: {
|
||||
value: "4",
|
||||
boost: 3.0
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
window_size: 100
|
||||
rank_constant: 1
|
||||
aggs:
|
||||
sums:
|
||||
scripted_metric:
|
||||
init_script: |
|
||||
state['sums'] = ['asc': [], 'desc': [], 'text': []]
|
||||
map_script: |
|
||||
state['sums']['asc'].add($('vector_asc', null).getVector()[0]);
|
||||
state['sums']['desc'].add($('vector_asc', null).getVector()[0]);
|
||||
state['sums']['text'].add(Integer.parseInt($('text', null).substring(5)));
|
||||
combine_script: |
|
||||
[
|
||||
'asc_total': state['sums']['asc'].stream().mapToDouble(v -> v).sum(),
|
||||
'desc_total': state['sums']['asc'].stream().mapToDouble(v -> v).sum(),
|
||||
'text_total': state['sums']['text'].stream().mapToInt(v -> v).sum()
|
||||
]
|
||||
reduce_script: |
|
||||
[
|
||||
'asc_total': states.stream().mapToDouble(v -> v['asc_total']).sum(),
|
||||
'desc_total': states.stream().mapToDouble(v -> v['desc_total']).sum(),
|
||||
'text_total': states.stream().mapToInt(v -> v['text_total']).sum()
|
||||
]
|
||||
|
||||
- match: { hits.hits.0._id: "6" }
|
||||
- match: { hits.hits.0._rank: 1 }
|
||||
|
||||
- match: { hits.hits.1._id: "5" }
|
||||
- match: { hits.hits.1._rank: 2 }
|
||||
|
||||
- match: { hits.hits.2._id: "7" }
|
||||
- match: { hits.hits.2._rank: 3 }
|
||||
|
||||
- match: { hits.hits.3._id: "4" }
|
||||
- match: { hits.hits.3._rank: 4 }
|
||||
|
||||
- match: { hits.hits.4._id: "8" }
|
||||
- match: { hits.hits.4._rank: 5 }
|
||||
|
||||
- close_to: { aggregations.sums.value.asc_total: { value: 30.0, error: 0.001 }}
|
||||
- close_to: { aggregations.sums.value.desc_total: { value: 30.0, error: 0.001 }}
|
||||
- match: { aggregations.sums.value.text_total: 30 }
|
Loading…
Add table
Add a link
Reference in a new issue