Merge branch 'main' into main-update-9-10-24

This commit is contained in:
Simon Cooper 2024-10-09 17:08:05 +01:00
commit 09f91cdaec
385 changed files with 20490 additions and 6852 deletions

View file

@ -20,7 +20,11 @@ dependencies {
compileOnly project(path: xpackModule('core'))
testImplementation(testArtifact(project(xpackModule('core'))))
testImplementation(testArtifact(project(':server')))
clusterModules project(xpackModule('rank-rrf'))
clusterModules project(xpackModule('inference'))
clusterModules project(':modules:lang-painless')
clusterPlugins project(':x-pack:plugin:inference:qa:test-service-plugin')
}

View file

@ -33,7 +33,6 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
@ -98,7 +97,7 @@ public class RRFRetrieverBuilderIT extends ESIntegTestCase {
}
}
""";
createIndex(INDEX, Settings.builder().put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 0).build());
createIndex(INDEX, Settings.builder().put(SETTING_NUMBER_OF_SHARDS, randomIntBetween(1, 5)).build());
admin().indices().preparePutMapping(INDEX).setSource(mapping, XContentType.JSON).get();
indexDoc(INDEX, "doc_1", DOC_FIELD, "doc_1", TOPIC_FIELD, "technology", TEXT_FIELD, "term");
indexDoc(
@ -167,8 +166,8 @@ public class RRFRetrieverBuilderIT extends ESIntegTestCase {
QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2", "doc_3", "doc_6")).boost(20L)
);
standard1.getPreFilterQueryBuilders().add(QueryBuilders.queryStringQuery("search").defaultField(TEXT_FIELD));
// this one retrieves docs 3, 2, 6, and 7
KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 4.0f }, null, 10, 100, null);
// this one retrieves docs 2, 3, 6, and 7
KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 2.0f }, null, 10, 100, null);
source.retriever(
new RRFRetrieverBuilder(
Arrays.asList(
@ -214,8 +213,8 @@ public class RRFRetrieverBuilderIT extends ESIntegTestCase {
QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2", "doc_3", "doc_6")).boost(20L)
);
standard1.getPreFilterQueryBuilders().add(QueryBuilders.queryStringQuery("search").defaultField(TEXT_FIELD));
// this one retrieves docs 3, 2, 6, and 7
KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 4.0f }, null, 10, 100, null);
// this one retrieves docs 2, 3, 6, and 7
KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 2.0f }, null, 10, 100, null);
source.retriever(
new RRFRetrieverBuilder(
Arrays.asList(
@ -266,8 +265,8 @@ public class RRFRetrieverBuilderIT extends ESIntegTestCase {
QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2", "doc_3", "doc_6")).boost(20L)
);
standard1.getPreFilterQueryBuilders().add(QueryBuilders.queryStringQuery("search").defaultField(TEXT_FIELD));
// this one retrieves docs 3, 2, 6, and 7
KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 4.0f }, null, 10, 100, null);
// this one retrieves docs 2, 3, 6, and 7
KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 2.0f }, null, 10, 100, null);
source.retriever(
new RRFRetrieverBuilder(
Arrays.asList(
@ -320,8 +319,8 @@ public class RRFRetrieverBuilderIT extends ESIntegTestCase {
QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2", "doc_3", "doc_6")).boost(20L)
);
standard1.getPreFilterQueryBuilders().add(QueryBuilders.queryStringQuery("search").defaultField(TEXT_FIELD));
// this one retrieves docs 3, 2, 6, and 7
KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 4.0f }, null, 10, 100, null);
// this one retrieves docs 2, 3, 6, and 7
KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 2.0f }, null, 10, 100, null);
source.retriever(
new RRFRetrieverBuilder(
Arrays.asList(
@ -383,8 +382,8 @@ public class RRFRetrieverBuilderIT extends ESIntegTestCase {
QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2", "doc_3", "doc_6")).boost(20L)
);
standard1.getPreFilterQueryBuilders().add(QueryBuilders.queryStringQuery("search").defaultField(TEXT_FIELD));
// this one retrieves docs 3, 2, 6, and 7
KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 4.0f }, null, 10, 100, null);
// this one retrieves docs 2, 3, 6, and 7
KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 2.0f }, null, 10, 100, null);
source.retriever(
new RRFRetrieverBuilder(
Arrays.asList(
@ -446,8 +445,8 @@ public class RRFRetrieverBuilderIT extends ESIntegTestCase {
QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2", "doc_3", "doc_6")).boost(20L)
);
standard1.getPreFilterQueryBuilders().add(QueryBuilders.queryStringQuery("search").defaultField(TEXT_FIELD));
// this one retrieves docs 3, 2, 6, and 7
KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 4.0f }, null, 10, 100, null);
// this one retrieves docs 2, 3, 6, and 7
KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 2.0f }, null, 10, 100, null);
source.retriever(
new RRFRetrieverBuilder(
Arrays.asList(
@ -474,13 +473,12 @@ public class RRFRetrieverBuilderIT extends ESIntegTestCase {
assertThat(resp.getHits().getAt(0).getExplanation().getDetails().length, equalTo(2));
var rrfDetails = resp.getHits().getAt(0).getExplanation().getDetails()[0];
assertThat(rrfDetails.getDetails().length, equalTo(3));
assertThat(rrfDetails.getDescription(), containsString("computed for initial ranks [2, 1, 2]"));
assertThat(rrfDetails.getDescription(), containsString("computed for initial ranks [2, 1, 1]"));
assertThat(rrfDetails.getDetails()[0].getDescription(), containsString("for rank [2] in query at index [0]"));
assertThat(rrfDetails.getDetails()[0].getDescription(), containsString("for rank [2] in query at index [0]"));
assertThat(rrfDetails.getDetails()[0].getDescription(), containsString("[my_custom_retriever]"));
assertThat(rrfDetails.getDetails()[1].getDescription(), containsString("for rank [1] in query at index [1]"));
assertThat(rrfDetails.getDetails()[2].getDescription(), containsString("for rank [2] in query at index [2]"));
assertThat(rrfDetails.getDetails()[2].getDescription(), containsString("for rank [1] in query at index [2]"));
});
}
@ -503,8 +501,8 @@ public class RRFRetrieverBuilderIT extends ESIntegTestCase {
QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2", "doc_3", "doc_6")).boost(20L)
);
standard1.getPreFilterQueryBuilders().add(QueryBuilders.queryStringQuery("search").defaultField(TEXT_FIELD));
// this one retrieves docs 3, 2, 6, and 7
KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 4.0f }, null, 10, 100, null);
// this one retrieves docs 2, 3, 6, and 7
KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 2.0f }, null, 10, 100, null);
RRFRetrieverBuilder nestedRRF = new RRFRetrieverBuilder(
Arrays.asList(

View file

@ -12,6 +12,7 @@ import org.apache.lucene.search.join.ScoreMode;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.retriever.CompoundRetrieverBuilder;
import org.elasticsearch.search.retriever.KnnRetrieverBuilder;
@ -21,8 +22,9 @@ import org.elasticsearch.xcontent.XContentType;
import java.util.Arrays;
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS;
import static org.hamcrest.Matchers.closeTo;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.equalTo;
public class RRFRetrieverBuilderNestedDocsIT extends RRFRetrieverBuilderIT {
@ -68,7 +70,7 @@ public class RRFRetrieverBuilderNestedDocsIT extends RRFRetrieverBuilderIT {
}
}
""";
createIndex(INDEX, Settings.builder().put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 0).build());
createIndex(INDEX, Settings.builder().put(SETTING_NUMBER_OF_SHARDS, randomIntBetween(1, 5)).build());
admin().indices().preparePutMapping(INDEX).setSource(mapping, XContentType.JSON).get();
indexDoc(INDEX, "doc_1", DOC_FIELD, "doc_1", TOPIC_FIELD, "technology", TEXT_FIELD, "term", LAST_30D_FIELD, 100);
indexDoc(
@ -134,9 +136,9 @@ public class RRFRetrieverBuilderNestedDocsIT extends RRFRetrieverBuilderIT {
final int rankWindowSize = 100;
final int rankConstant = 10;
SearchSourceBuilder source = new SearchSourceBuilder();
// this one retrieves docs 1, 4
// this one retrieves docs 1
StandardRetrieverBuilder standard0 = new StandardRetrieverBuilder(
QueryBuilders.nestedQuery("views", QueryBuilders.rangeQuery(LAST_30D_FIELD).gte(30L), ScoreMode.Avg)
QueryBuilders.nestedQuery("views", QueryBuilders.rangeQuery(LAST_30D_FIELD).gte(50L), ScoreMode.Avg)
);
// this one retrieves docs 2 and 6 due to prefilter
StandardRetrieverBuilder standard1 = new StandardRetrieverBuilder(
@ -157,16 +159,21 @@ public class RRFRetrieverBuilderNestedDocsIT extends RRFRetrieverBuilderIT {
)
);
source.fetchField(TOPIC_FIELD);
source.explain(true);
SearchRequestBuilder req = client().prepareSearch(INDEX).setSource(source);
ElasticsearchAssertions.assertResponse(req, resp -> {
assertNull(resp.pointInTimeId());
assertNotNull(resp.getHits().getTotalHits());
assertThat(resp.getHits().getTotalHits().value, equalTo(4L));
assertThat(resp.getHits().getTotalHits().value, equalTo(3L));
assertThat(resp.getHits().getTotalHits().relation, equalTo(TotalHits.Relation.EQUAL_TO));
assertThat(resp.getHits().getAt(0).getId(), equalTo("doc_6"));
assertThat(resp.getHits().getAt(1).getId(), equalTo("doc_1"));
assertThat(resp.getHits().getAt(2).getId(), equalTo("doc_2"));
assertThat(resp.getHits().getAt(3).getId(), equalTo("doc_4"));
assertThat((double) resp.getHits().getAt(0).getScore(), closeTo(0.1742, 1e-4));
assertThat(
Arrays.stream(resp.getHits().getHits()).skip(1).map(SearchHit::getId).toList(),
containsInAnyOrder("doc_1", "doc_2")
);
assertThat((double) resp.getHits().getAt(1).getScore(), closeTo(0.0909, 1e-4));
assertThat((double) resp.getHits().getAt(2).getScore(), closeTo(0.0909, 1e-4));
});
}
}

View file

@ -8,6 +8,7 @@
package org.elasticsearch.xpack.rank.rrf;
import org.apache.lucene.search.Explanation;
import org.elasticsearch.TransportVersion;
import org.elasticsearch.TransportVersions;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
@ -169,4 +170,9 @@ public final class RRFRankDoc extends RankDoc {
builder.field("scores", scores);
builder.field("rankConstant", rankConstant);
}
@Override
public TransportVersion getMinimalSupportedVersion() {
return TransportVersions.RRF_QUERY_REWRITE;
}
}

View file

@ -25,7 +25,7 @@ public class RRFRankPlugin extends Plugin implements SearchPlugin {
public static final LicensedFeature.Momentary RANK_RRF_FEATURE = LicensedFeature.momentary(
null,
"rank-rrf",
License.OperationMode.PLATINUM
License.OperationMode.ENTERPRISE
);
public static final String NAME = "rrf";

View file

@ -180,10 +180,7 @@ public final class RRFRetrieverBuilder extends CompoundRetrieverBuilder<RRFRetri
builder.startArray(RETRIEVERS_FIELD.getPreferredName());
for (var entry : innerRetrievers) {
builder.startObject();
builder.field(entry.retriever().getName());
entry.retriever().toXContent(builder, params);
builder.endObject();
}
builder.endArray();
}

View file

@ -7,15 +7,17 @@
package org.elasticsearch.xpack.rank.rrf;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.Writeable.Reader;
import org.elasticsearch.test.AbstractWireSerializingTestCase;
import org.elasticsearch.search.rank.AbstractRankDocWireSerializingTestCase;
import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
import java.util.List;
import static org.elasticsearch.xpack.rank.rrf.RRFRankDoc.NO_RANK;
public class RRFRankDocTests extends AbstractWireSerializingTestCase<RRFRankDoc> {
public class RRFRankDocTests extends AbstractRankDocWireSerializingTestCase<RRFRankDoc> {
static RRFRankDoc createTestRRFRankDoc(int queryCount) {
RRFRankDoc instance = new RRFRankDoc(
@ -35,9 +37,13 @@ public class RRFRankDocTests extends AbstractWireSerializingTestCase<RRFRankDoc>
return instance;
}
static RRFRankDoc createTestRRFRankDoc() {
int queryCount = randomIntBetween(2, 20);
return createTestRRFRankDoc(queryCount);
@Override
protected List<NamedWriteableRegistry.Entry> getAdditionalNamedWriteables() {
try (RRFRankPlugin rrfRankPlugin = new RRFRankPlugin()) {
return rrfRankPlugin.getNamedWriteables();
} catch (IOException ex) {
throw new AssertionError("Failed to create RRFRankPlugin", ex);
}
}
@Override
@ -46,8 +52,9 @@ public class RRFRankDocTests extends AbstractWireSerializingTestCase<RRFRankDoc>
}
@Override
protected RRFRankDoc createTestInstance() {
return createTestRRFRankDoc();
protected RRFRankDoc createTestRankDoc() {
int queryCount = randomIntBetween(2, 20);
return createTestRRFRankDoc(queryCount);
}
@Override

View file

@ -8,19 +8,27 @@
package org.elasticsearch.xpack.rank.rrf;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.common.Strings;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.retriever.RetrieverBuilder;
import org.elasticsearch.search.retriever.RetrieverParserContext;
import org.elasticsearch.search.retriever.TestRetrieverBuilder;
import org.elasticsearch.test.AbstractXContentTestCase;
import org.elasticsearch.usage.SearchUsage;
import org.elasticsearch.usage.SearchUsageHolder;
import org.elasticsearch.usage.UsageService;
import org.elasticsearch.xcontent.NamedXContentRegistry;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.json.JsonXContent;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
public class RRFRetrieverBuilderParsingTests extends AbstractXContentTestCase<RRFRetrieverBuilder> {
/**
@ -53,7 +61,10 @@ public class RRFRetrieverBuilderParsingTests extends AbstractXContentTestCase<RR
@Override
protected RRFRetrieverBuilder doParseInstance(XContentParser parser) throws IOException {
return RRFRetrieverBuilder.PARSER.apply(parser, new RetrieverParserContext(new SearchUsage(), nf -> true));
return (RRFRetrieverBuilder) RetrieverBuilder.parseTopLevelRetrieverBuilder(
parser,
new RetrieverParserContext(new SearchUsage(), nf -> true)
);
}
@Override
@ -81,4 +92,48 @@ public class RRFRetrieverBuilderParsingTests extends AbstractXContentTestCase<RR
);
return new NamedXContentRegistry(entries);
}
public void testRRFRetrieverParsing() throws IOException {
String restContent = "{"
+ " \"retriever\": {"
+ " \"rrf\": {"
+ " \"retrievers\": ["
+ " {"
+ " \"test\": {"
+ " \"value\": \"foo\""
+ " }"
+ " },"
+ " {"
+ " \"test\": {"
+ " \"value\": \"bar\""
+ " }"
+ " }"
+ " ],"
+ " \"rank_window_size\": 100,"
+ " \"rank_constant\": 10,"
+ " \"min_score\": 20.0,"
+ " \"_name\": \"foo_rrf\""
+ " }"
+ " }"
+ "}";
SearchUsageHolder searchUsageHolder = new UsageService().getSearchUsageHolder();
try (XContentParser jsonParser = createParser(JsonXContent.jsonXContent, restContent)) {
SearchSourceBuilder source = new SearchSourceBuilder().parseXContent(jsonParser, true, searchUsageHolder, nf -> true);
assertThat(source.retriever(), instanceOf(RRFRetrieverBuilder.class));
RRFRetrieverBuilder parsed = (RRFRetrieverBuilder) source.retriever();
assertThat(parsed.minScore(), equalTo(20f));
assertThat(parsed.retrieverName(), equalTo("foo_rrf"));
try (XContentParser parseSerialized = createParser(JsonXContent.jsonXContent, Strings.toString(source))) {
SearchSourceBuilder deserializedSource = new SearchSourceBuilder().parseXContent(
parseSerialized,
true,
searchUsageHolder,
nf -> true
);
assertThat(deserializedSource.retriever(), instanceOf(RRFRetrieverBuilder.class));
RRFRetrieverBuilder deserialized = (RRFRetrieverBuilder) source.retriever();
assertThat(parsed, equalTo(deserialized));
}
}
}
}

View file

@ -23,7 +23,9 @@ public class RRFRankClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
.nodes(2)
.module("rank-rrf")
.module("lang-painless")
.module("x-pack-inference")
.setting("xpack.license.self_generated.type", "trial")
.plugin("inference-service-test")
.build();
public RRFRankClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) {

View file

@ -1,7 +1,6 @@
setup:
- skip:
features: close_to
- requires:
cluster_features: 'rrf_retriever_composition_supported'
reason: 'test requires rrf retriever composition support'
@ -10,8 +9,6 @@ setup:
indices.create:
index: test
body:
settings:
number_of_shards: 1
mappings:
properties:
text:
@ -42,7 +39,7 @@ setup:
index: test
id: "1"
body:
text: "term term term term term term term term term"
text: "term1"
vector: [1.0]
- do:
@ -50,7 +47,7 @@ setup:
index: test
id: "2"
body:
text: "term term term term term term term term"
text: "term2"
text_to_highlight: "search for the truth"
keyword: "biology"
vector: [2.0]
@ -60,8 +57,8 @@ setup:
index: test
id: "3"
body:
text: "term term term term term term term"
text_to_highlight: "nothing related but still a match"
text: "term3"
text_to_highlight: "nothing related"
keyword: "technology"
vector: [3.0]
@ -70,14 +67,14 @@ setup:
index: test
id: "4"
body:
text: "term term term term term term"
text: "term4"
vector: [4.0]
- do:
index:
index: test
id: "5"
body:
text: "term term term term term"
text: "term5"
text_to_highlight: "You know, for Search!"
keyword: "technology"
integer: 5
@ -87,7 +84,7 @@ setup:
index: test
id: "6"
body:
text: "term term term term"
text: "term6"
keyword: "biology"
integer: 6
vector: [6.0]
@ -96,27 +93,26 @@ setup:
index: test
id: "7"
body:
text: "term term term"
text: "term7"
keyword: "astronomy"
vector: [7.0]
vector: [77.0]
nested: { views: 50}
- do:
index:
index: test
id: "8"
body:
text: "term term"
text: "term8"
keyword: "technology"
vector: [8.0]
nested: { views: 100}
- do:
index:
index: test
id: "9"
body:
text: "term"
text: "term9"
integer: 2
keyword: "technology"
vector: [9.0]
nested: { views: 10}
- do:
indices.refresh: {}
@ -133,6 +129,7 @@ setup:
rrf:
retrievers: [
{
# this one retrieves docs 6, 5, 4
knn: {
field: vector,
query_vector: [ 6.0 ],
@ -141,10 +138,72 @@ setup:
}
},
{
# this one retrieves docs 4, 5, 1, 2, 6
standard: {
query: {
term: {
text: term
bool: {
should: [
{
constant_score: {
filter: {
term: {
text: term4
}
},
boost: 10.0
}
},
{
constant_score: {
filter: {
term: {
text: term5
}
},
boost: 9.0
}
},
{
constant_score: {
filter: {
term: {
text: term1
}
},
boost: 8.0
}
},
{
constant_score: {
filter: {
term: {
text: term2
}
},
boost: 7.0
}
},
{
constant_score: {
filter: {
term: {
text: term6
}
},
boost: 6.0
}
},
{
constant_score: {
filter: {
exists: {
field: text
}
},
boost: 1
}
}
]
}
}
}
@ -158,9 +217,13 @@ setup:
terms:
field: keyword
- match: { hits.hits.0._id: "5" }
- match: { hits.hits.1._id: "1" }
- match: { hits.hits.0._id: "4" }
- close_to: { hits.hits.0._score: { value: 0.1678, error: 0.001 } }
- match: { hits.hits.1._id: "5" }
- close_to: { hits.hits.1._score: { value: 0.1666, error: 0.001 } }
- match: { hits.hits.2._id: "6" }
- close_to: { hits.hits.2._score: { value: 0.1575, error: 0.001 } }
- match: { aggregations.keyword_aggs.buckets.0.key: "technology" }
- match: { aggregations.keyword_aggs.buckets.0.doc_count: 4 }
@ -181,6 +244,7 @@ setup:
rrf:
retrievers: [
{
# this one retrieves docs 6, 5, 4
knn: {
field: vector,
query_vector: [ 6.0 ],
@ -189,10 +253,72 @@ setup:
}
},
{
# this one retrieves docs 4, 5, 1, 2, 6
standard: {
query: {
term: {
text: term
bool: {
should: [
{
constant_score: {
filter: {
term: {
text: term4
}
},
boost: 10.0
}
},
{
constant_score: {
filter: {
term: {
text: term5
}
},
boost: 9.0
}
},
{
constant_score: {
filter: {
term: {
text: term1
}
},
boost: 8.0
}
},
{
constant_score: {
filter: {
term: {
text: term2
}
},
boost: 7.0
}
},
{
constant_score: {
filter: {
term: {
text: term6
}
},
boost: 6.0
}
},
{
constant_score: {
filter: {
exists: {
field: text
}
},
boost: 1
}
}
]
}
}
}
@ -208,12 +334,14 @@ setup:
lang: painless
source: "_score"
- match: { hits.hits.0._id: "5" }
- match: { hits.hits.1._id: "1" }
- match: { hits.hits.0._id: "4" }
- close_to: { hits.hits.0._score: { value: 0.1678, error: 0.001 } }
- match: { hits.hits.1._id: "5" }
- close_to: { hits.hits.1._score: { value: 0.1666, error: 0.001 } }
- match: { hits.hits.2._id: "6" }
- close_to: { hits.hits.2._score: { value: 0.1575, error: 0.001 } }
- close_to: { aggregations.max_score.value: { value: 0.15, error: 0.001 }}
- close_to: { aggregations.max_score.value: { value: 0.1678, error: 0.001 }}
---
"rrf retriever with top-level collapse":
@ -228,6 +356,7 @@ setup:
rrf:
retrievers: [
{
# this one retrieves docs 6, 5, 4
knn: {
field: vector,
query_vector: [ 6.0 ],
@ -236,10 +365,72 @@ setup:
}
},
{
# this one retrieves docs 4, 5, 1, 2, 6
standard: {
query: {
term: {
text: term
bool: {
should: [
{
constant_score: {
filter: {
term: {
text: term4
}
},
boost: 10.0
}
},
{
constant_score: {
filter: {
term: {
text: term5
}
},
boost: 9.0
}
},
{
constant_score: {
filter: {
term: {
text: term1
}
},
boost: 8.0
}
},
{
constant_score: {
filter: {
term: {
text: term2
}
},
boost: 7.0
}
},
{
constant_score: {
filter: {
term: {
text: term6
}
},
boost: 6.0
}
},
{
constant_score: {
filter: {
exists: {
field: text
}
},
boost: 1
}
}
]
}
}
}
@ -250,18 +441,23 @@ setup:
size: 3
collapse: { field: keyword, inner_hits: { name: sub_hits, size: 2 } }
- match: { hits.hits.0._id: "5" }
- match: { hits.hits.1._id: "1" }
- match: { hits.total : 9 }
- match: { hits.hits.0._id: "4" }
- close_to: { hits.hits.0._score: { value: 0.1678, error: 0.001 } }
- match: { hits.hits.1._id: "5" }
- close_to: { hits.hits.1._score: { value: 0.1666, error: 0.001 } }
- match: { hits.hits.2._id: "6" }
- close_to: { hits.hits.2._score: { value: 0.1575, error: 0.001 } }
- match: { hits.hits.0.inner_hits.sub_hits.hits.total : 4 }
- length: { hits.hits.0.inner_hits.sub_hits.hits.hits : 2 }
- match: { hits.hits.0.inner_hits.sub_hits.hits.hits.0._id: "5" }
- match: { hits.hits.0.inner_hits.sub_hits.hits.hits.1._id: "3" }
- match: { hits.hits.0.inner_hits.sub_hits.hits.hits.0._id: "4" }
- match: { hits.hits.0.inner_hits.sub_hits.hits.hits.1._id: "1" }
- match: { hits.hits.1.inner_hits.sub_hits.hits.total : 4 }
- length: { hits.hits.1.inner_hits.sub_hits.hits.hits : 2 }
- match: { hits.hits.1.inner_hits.sub_hits.hits.hits.0._id: "1" }
- match: { hits.hits.1.inner_hits.sub_hits.hits.hits.1._id: "4" }
- match: { hits.hits.1.inner_hits.sub_hits.hits.hits.0._id: "5" }
- match: { hits.hits.1.inner_hits.sub_hits.hits.hits.1._id: "3" }
- length: { hits.hits.2.inner_hits.sub_hits.hits.hits: 2 }
- match: { hits.hits.2.inner_hits.sub_hits.hits.hits.0._id: "6" }
@ -280,18 +476,132 @@ setup:
rrf:
retrievers: [
{
knn: {
field: vector,
query_vector: [ 6.0 ],
k: 3,
num_candidates: 10
# this one retrieves docs 7, 3
standard: {
query: {
bool: {
should: [
{
constant_score: {
filter: {
term: {
text: term7
}
},
boost: 10.0
}
},
{
constant_score: {
filter: {
term: {
text: term3
} },
boost: 9.0
}
}
]
}
}
}
},
{
# this one retrieves docs 1, 2, 3, 7
standard: {
query: {
term: {
text: term
bool: {
should: [
{
constant_score: {
filter: {
term: {
text: term1
}
},
boost: 10.0
}
},
{
constant_score: {
filter: {
term: {
text: term2
}
},
boost: 9.0
}
},
{
constant_score: {
filter: {
term: {
text: term3
}
},
boost: 8.0
}
},
{
constant_score: {
filter: {
term: {
text: term4
}
},
boost: 7.0
}
},
{
constant_score: {
filter: {
term: {
text: term5
}
},
boost: 6.0
}
},
{
constant_score: {
filter: {
term: {
text: term6
}
},
boost: 5.0
}
},
{
constant_score: {
filter: {
term: {
text: term7
}
},
boost: 4.0
}
},
{
constant_score: {
filter: {
term: {
text: term8
}
},
boost: 3.0
}
},
{
constant_score: {
filter: {
term: {
text: term9
}
},
boost: 2.0
}
}
]
}
},
collapse: { field: keyword, inner_hits: { name: sub_hits, size: 1 } }
@ -303,8 +613,9 @@ setup:
size: 3
- match: { hits.hits.0._id: "7" }
- match: { hits.hits.1._id: "1" }
- match: { hits.hits.2._id: "6" }
- close_to: { hits.hits.0._score: { value: 0.1623, error: 0.001 } }
- match: { hits.hits.1._id: "3" }
- close_to: { hits.hits.1._score: { value: 0.1602, error: 0.001 } }
---
"rrf retriever highlighting results":
@ -331,7 +642,7 @@ setup:
standard: {
query: {
term: {
keyword: technology
text: term5
}
}
}
@ -349,7 +660,7 @@ setup:
}
}
- match: { hits.total : 5 }
- match: { hits.total : 2 }
- match: { hits.hits.0._id: "5" }
- match: { hits.hits.0.highlight.text_to_highlight.0: "You know, for <em>Search</em>!" }
@ -357,9 +668,6 @@ setup:
- match: { hits.hits.1._id: "2" }
- match: { hits.hits.1.highlight.text_to_highlight.0: "<em>search</em> for the truth" }
- match: { hits.hits.2._id: "3" }
- not_exists: hits.hits.2.highlight
---
"rrf retriever with custom nested sort":
@ -374,12 +682,103 @@ setup:
retrievers: [
{
# this one retrievers docs 1, 2, 3, .., 9
# but due to sorting, it will revert the order to 6, 5, .., 9 which due to
# but due to sorting, it will revert the order to 6, 5, 9, ... which due to
# rank_window_size: 2 will only return 6 and 5
standard: {
query: {
term: {
text: term
bool: {
should: [
{
constant_score: {
filter: {
term: {
text: term1
}
},
boost: 10.0
}
},
{
constant_score: {
filter: {
term: {
text: term2
}
},
boost: 9.0
}
},
{
constant_score: {
filter: {
term: {
text: term3
}
},
boost: 8.0
}
},
{
constant_score: {
filter: {
term: {
text: term4
}
},
boost: 7.0
}
},
{
constant_score: {
filter: {
term: {
text: term5
}
},
boost: 6.0
}
},
{
constant_score: {
filter: {
term: {
text: term6
}
},
boost: 5.0
}
},
{
constant_score: {
filter: {
term: {
text: term7
}
},
boost: 4.0
}
},
{
constant_score: {
filter: {
term: {
text: term8
}
},
boost: 3.0
}
},
{
constant_score: {
filter: {
term: {
text: term9
}
},
boost: 2.0
}
}
]
}
},
sort: [
@ -410,7 +809,6 @@ setup:
- length: {hits.hits: 2 }
- match: { hits.hits.0._id: "6" }
- match: { hits.hits.1._id: "2" }
---
"rrf retriever with nested query":
@ -427,7 +825,7 @@ setup:
{
knn: {
field: vector,
query_vector: [ 7.0 ],
query_vector: [ 77.0 ],
k: 1,
num_candidates: 3
}

View file

@ -0,0 +1,334 @@
setup:
- requires:
cluster_features: ['rrf_retriever_composition_supported', 'text_similarity_reranker_retriever_supported']
reason: need to have support for rrf and semantic reranking composition
test_runner_features: "close_to"
- do:
inference.put:
task_type: rerank
inference_id: my-rerank-model
body: >
{
"service": "test_reranking_service",
"service_settings": {
"model_id": "my_model",
"api_key": "abc64"
},
"task_settings": {
}
}
- do:
indices.create:
index: test-index
body:
settings:
number_of_shards: 1
mappings:
properties:
text:
type: text
topic:
type: keyword
subtopic:
type: keyword
integer:
type: integer
- do:
index:
index: test-index
id: doc_1
body:
text: "Sun Moon Lake is a lake in Nantou County, Taiwan. It is the largest lake in Taiwan."
topic: [ "geography" ]
integer: 1
- do:
index:
index: test-index
id: doc_2
body:
text: "The phases of the Moon come from the position of the Moon relative to the Earth and Sun."
topic: [ "science" ]
subtopic: [ "astronomy" ]
integer: 2
- do:
index:
index: test-index
id: doc_3
body:
text: "As seen from Earth, a solar eclipse happens when the Moon is directly between the Earth and the Sun."
topic: [ "science" ]
subtopic: [ "technology" ]
integer: 3
- do:
indices.refresh: {}
---
"rrf retriever with a nested text similarity reranker":
- do:
search:
index: test-index
body:
track_total_hits: true
fields: [ "text", "topic" ]
retriever:
rrf: {
retrievers:
[
{
standard: {
query: {
bool: {
should:
[
{
constant_score: {
filter: {
term: {
integer: 1
}
},
boost: 10
}
},
{
constant_score:
{
filter:
{
term:
{
integer: 2
}
},
boost: 1
}
}
]
}
}
}
},
{
text_similarity_reranker: {
retriever:
{
standard: {
query: {
term: {
topic: "science"
}
}
}
},
rank_window_size: 10,
inference_id: my-rerank-model,
inference_text: "How often does the moon hide the sun?",
field: text
}
}
],
rank_window_size: 10,
rank_constant: 1
}
size: 10
from: 1
aggs:
topics:
terms:
field: topic
size: 10
- match: { hits.total.value: 3 }
- length: { hits.hits: 2 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.hits.1._id: "doc_3" }
- match: { aggregations.topics.buckets.0.key: "science" }
- match: { aggregations.topics.buckets.0.doc_count: 2 }
- match: { aggregations.topics.buckets.1.key: "geography" }
- match: { aggregations.topics.buckets.1.doc_count: 1 }
---
"Text similarity reranker on top of an RRF retriever":
- do:
search:
index: test-index
body:
track_total_hits: true
fields: [ "text", "topic" ]
retriever:
{
text_similarity_reranker: {
retriever:
{
rrf: {
retrievers:
[
{
standard: {
query: {
bool: {
should:
[
{
constant_score: {
filter: {
term: {
integer: 1
}
},
boost: 10
}
},
{
constant_score:
{
filter:
{
term:
{
integer: 3
}
},
boost: 1
}
}
]
}
}
}
},
{
standard: {
query: {
term: {
topic: "geography"
}
}
}
}
],
rank_window_size: 10,
rank_constant: 1
}
},
rank_window_size: 10,
inference_id: my-rerank-model,
inference_text: "How often does the moon hide the sun?",
field: text
}
}
size: 10
aggs:
topics:
terms:
field: topic
size: 10
- match: { hits.total.value: 2 }
- length: { hits.hits: 2 }
- match: { hits.hits.0._id: "doc_3" }
- match: { hits.hits.1._id: "doc_1" }
- match: { aggregations.topics.buckets.0.key: "geography" }
- match: { aggregations.topics.buckets.0.doc_count: 1 }
- match: { aggregations.topics.buckets.1.key: "science" }
- match: { aggregations.topics.buckets.1.doc_count: 1 }
---
"explain using rrf retriever and text-similarity":
- do:
search:
index: test-index
body:
track_total_hits: true
fields: [ "text", "topic" ]
retriever:
rrf: {
retrievers:
[
{
standard: {
query: {
bool: {
should:
[
{
constant_score: {
filter: {
term: {
integer: 1
}
},
boost: 10
}
},
{
constant_score:
{
filter:
{
term:
{
integer: 2
}
},
boost: 1
}
}
]
}
}
}
},
{
text_similarity_reranker: {
retriever:
{
standard: {
query: {
term: {
topic: "science"
}
}
}
},
rank_window_size: 10,
inference_id: my-rerank-model,
inference_text: "How often does the moon hide the sun?",
field: text
}
}
],
rank_window_size: 10,
rank_constant: 1
}
size: 10
explain: true
- match: { hits.hits.0._id: "doc_2" }
- match: { hits.hits.1._id: "doc_1" }
- match: { hits.hits.2._id: "doc_3" }
- close_to: { hits.hits.0._explanation.value: { value: 0.6666667, error: 0.000001 } }
- match: {hits.hits.0._explanation.description: "/rrf.score:.\\[0.6666667\\].*/" }
- match: {hits.hits.0._explanation.details.0.value: 2}
- match: {hits.hits.0._explanation.details.0.description: "/rrf.score:.\\[0.33333334\\].*/" }
- match: {hits.hits.0._explanation.details.0.details.0.details.0.description: "/ConstantScore.*/" }
- match: {hits.hits.0._explanation.details.1.value: 2}
- match: {hits.hits.0._explanation.details.1.description: "/rrf.score:.\\[0.33333334\\].*/" }
- match: {hits.hits.0._explanation.details.1.details.0.description: "/text_similarity_reranker.match.using.inference.endpoint:.\\[my-rerank-model\\].on.document.field:.\\[text\\].*/" }
- match: {hits.hits.0._explanation.details.1.details.0.details.0.description: "/weight.*science.*/" }