Update test_reranking_service to try and parse provided inputs as scores (#122328)

This commit is contained in:
Panagiotis Bailis 2025-02-14 12:30:10 +02:00 committed by GitHub
parent 37f974546d
commit 95f8454e40
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 156 additions and 55 deletions

View file

@ -36,6 +36,7 @@ import org.elasticsearch.xpack.core.inference.results.RankedDocsResults;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Comparator;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
@ -148,17 +149,29 @@ public class TestRerankingServiceExtension implements InferenceServiceExtension
} }
private RankedDocsResults makeResults(List<String> input) { private RankedDocsResults makeResults(List<String> input) {
List<RankedDocsResults.RankedDoc> results = new ArrayList<>();
int totalResults = input.size(); int totalResults = input.size();
try {
List<RankedDocsResults.RankedDoc> results = new ArrayList<>();
for (int i = 0; i < totalResults; i++) {
results.add(new RankedDocsResults.RankedDoc(i, Float.parseFloat(input.get(i)), input.get(i)));
}
return new RankedDocsResults(results.stream().sorted(Comparator.reverseOrder()).toList());
} catch (NumberFormatException ex) {
List<RankedDocsResults.RankedDoc> results = new ArrayList<>();
float minScore = random.nextFloat(-1f, 1f); float minScore = random.nextFloat(-1f, 1f);
float resultDiff = 0.2f; float resultDiff = 0.2f;
for (int i = 0; i < input.size(); i++) { for (int i = 0; i < input.size(); i++) {
results.add( results.add(
new RankedDocsResults.RankedDoc(totalResults - 1 - i, minScore + resultDiff * (totalResults - i), input.get(i)) new RankedDocsResults.RankedDoc(
totalResults - 1 - i,
minScore + resultDiff * (totalResults - i),
input.get(totalResults - 1 - i)
)
); );
} }
return new RankedDocsResults(results); return new RankedDocsResults(results);
} }
}
protected ServiceSettings getServiceSettingsFromMap(Map<String, Object> serviceSettingsMap) { protected ServiceSettings getServiceSettingsFromMap(Map<String, Object> serviceSettingsMap) {
return TestServiceSettings.fromMap(serviceSettingsMap); return TestServiceSettings.fromMap(serviceSettingsMap);

View file

@ -27,6 +27,9 @@ public class InferenceFeatures implements FeatureSpecification {
private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER = new NodeFeature("semantic_text.highlighter"); private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER = new NodeFeature("semantic_text.highlighter");
private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT = new NodeFeature("semantic_text.highlighter.default"); private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT = new NodeFeature("semantic_text.highlighter.default");
private static final NodeFeature TEST_RERANKING_SERVICE_PARSE_TEXT_AS_SCORE = new NodeFeature(
"test_reranking_service.parse_text_as_score"
);
@Override @Override
public Set<NodeFeature> getTestFeatures() { public Set<NodeFeature> getTestFeatures() {
@ -45,7 +48,8 @@ public class InferenceFeatures implements FeatureSpecification {
TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_ALIAS_HANDLING_FIX, TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_ALIAS_HANDLING_FIX,
SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT, SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT,
SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT, SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT,
SEMANTIC_KNN_FILTER_FIX SEMANTIC_KNN_FILTER_FIX,
TEST_RERANKING_SERVICE_PARSE_TEXT_AS_SCORE
); );
} }
} }

View file

@ -2,7 +2,6 @@ setup:
- skip: - skip:
features: features:
- close_to - close_to
- contains
- requires: - requires:
test_runner_features: "close_to" test_runner_features: "close_to"
@ -33,16 +32,8 @@ setup:
type: keyword type: keyword
subtopic: subtopic:
type: keyword type: keyword
inference_text_field:
- do: type: text
index:
index: test-index
id: doc_1
body:
text: "As seen from Earth, a solar eclipse happens when the Moon is directly between the Earth and the Sun."
topic: [ "science" ]
subtopic: [ "technology" ]
refresh: true
- do: - do:
index: index:
@ -52,6 +43,7 @@ setup:
text: "The phases of the Moon come from the position of the Moon relative to the Earth and Sun." text: "The phases of the Moon come from the position of the Moon relative to the Earth and Sun."
topic: [ "science" ] topic: [ "science" ]
subtopic: [ "astronomy" ] subtopic: [ "astronomy" ]
inference_text_field: "0"
refresh: true refresh: true
- do: - do:
@ -61,11 +53,27 @@ setup:
body: body:
text: "Sun Moon Lake is a lake in Nantou County, Taiwan. It is the largest lake in Taiwan." text: "Sun Moon Lake is a lake in Nantou County, Taiwan. It is the largest lake in Taiwan."
topic: [ "geography" ] topic: [ "geography" ]
inference_text_field: "1"
refresh: true
- do:
index:
index: test-index
id: doc_1
body:
text: "As seen from Earth, a solar eclipse happens when the Moon is directly between the Earth and the Sun."
topic: [ "science" ]
subtopic: [ "technology" ]
inference_text_field: "-1"
refresh: true refresh: true
--- ---
"Simple text similarity rank retriever": "Simple text similarity rank retriever":
- requires:
cluster_features: "test_reranking_service.parse_text_as_score"
reason: test_reranking_service can now parse provided input as score to provide deterministic ranks
- do: - do:
search: search:
index: test-index index: test-index
@ -75,14 +83,37 @@ setup:
retriever: retriever:
text_similarity_reranker: text_similarity_reranker:
retriever: retriever:
# this one returns docs 1 and 2
standard: standard:
query: query:
term: bool: {
topic: "science" should: [
{
constant_score: {
filter: {
term: {
subtopic: "technology"
}
},
boost: 10
}
},
{
constant_score: {
filter: {
term: {
subtopic: "astronomy"
}
},
boost: 1
}
}
]
}
rank_window_size: 10 rank_window_size: 10
inference_id: my-rerank-model inference_id: my-rerank-model
inference_text: "How often does the moon hide the sun?" inference_text: "How often does the moon hide the sun?"
field: text field: inference_text_field
size: 10 size: 10
- match: { hits.total.value: 2 } - match: { hits.total.value: 2 }
@ -94,6 +125,10 @@ setup:
--- ---
"Simple text similarity rank retriever and filtering": "Simple text similarity rank retriever and filtering":
- requires:
cluster_features: "test_reranking_service.parse_text_as_score"
reason: test_reranking_service can now parse provided input as score to provide deterministic ranks
- do: - do:
search: search:
index: test-index index: test-index
@ -103,6 +138,7 @@ setup:
retriever: retriever:
text_similarity_reranker: text_similarity_reranker:
retriever: retriever:
# this one returns doc 1
standard: standard:
query: query:
term: term:
@ -113,7 +149,7 @@ setup:
rank_window_size: 10 rank_window_size: 10
inference_id: my-rerank-model inference_id: my-rerank-model
inference_text: "How often does the moon hide the sun?" inference_text: "How often does the moon hide the sun?"
field: text field: inference_text_field
size: 10 size: 10
- match: { hits.total.value: 1 } - match: { hits.total.value: 1 }
@ -143,7 +179,7 @@ setup:
rank_window_size: 10 rank_window_size: 10
inference_id: i-dont-exist inference_id: i-dont-exist
inference_text: "How often does the moon hide the sun?" inference_text: "How often does the moon hide the sun?"
field: text field: inference_text_field
size: 10 size: 10
--- ---
@ -169,13 +205,17 @@ setup:
rank_window_size: 10 rank_window_size: 10
inference_id: i-dont-exist inference_id: i-dont-exist
inference_text: "asdfasdf" inference_text: "asdfasdf"
field: text field: inference_text_field
size: 10 size: 10
--- ---
"text similarity reranking with explain": "text similarity reranking with explain":
- requires:
cluster_features: "test_reranking_service.parse_text_as_score"
reason: test_reranking_service can now parse provided input as score to provide deterministic ranks
- do: - do:
search: search:
index: test-index index: test-index
@ -186,10 +226,32 @@ setup:
text_similarity_reranker: { text_similarity_reranker: {
retriever: retriever:
{ {
# this one returns doc 1 and 2
standard: { standard: {
query: { query: {
bool: {
should: [
{
constant_score: {
filter: {
term: { term: {
topic: "science" subtopic: "technology"
}
},
boost: 10
}
},
{
constant_score: {
filter: {
term: {
subtopic: "astronomy"
}
},
boost: 1
}
}
]
} }
} }
} }
@ -197,17 +259,17 @@ setup:
rank_window_size: 10, rank_window_size: 10,
inference_id: my-rerank-model, inference_id: my-rerank-model,
inference_text: "How often does the moon hide the sun?", inference_text: "How often does the moon hide the sun?",
field: text field: inference_text_field
} }
} }
size: 10 size: 10
explain: true explain: true
- contains: { hits.hits: { _id: "doc_2" } } - match: { hits.hits.0._id: "doc_2" }
- contains: { hits.hits: { _id: "doc_1" } } - match: { hits.hits.1._id: "doc_1" }
- match: {hits.hits.0._explanation.description: "/text_similarity_reranker.match.using.inference.endpoint:.\\[my-rerank-model\\].on.document.field:.\\[text\\].*/" } - match: {hits.hits.0._explanation.description: "/text_similarity_reranker.match.using.inference.endpoint:.\\[my-rerank-model\\].on.document.field:.\\[inference_text_field\\].*/" }
- match: {hits.hits.0._explanation.details.0.description: "/weight.*science.*/" } - match: {hits.hits.0._explanation.details.0.details.0.description: "/subtopic.*astronomy.*/" }
--- ---
"text similarity reranker properly handles aliases": "text similarity reranker properly handles aliases":
@ -281,7 +343,7 @@ setup:
rank_window_size: 10 rank_window_size: 10
inference_id: my-rerank-model inference_id: my-rerank-model
inference_text: "How often does the moon hide the sun?" inference_text: "How often does the moon hide the sun?"
field: text field: inference_text_field
size: 10 size: 10
- match: { hits.total.value: 1 } - match: { hits.total.value: 1 }

View file

@ -39,15 +39,9 @@ setup:
type: keyword type: keyword
integer: integer:
type: integer type: integer
inference_text_field:
type: text
- do:
index:
index: test-index
id: doc_1
body:
text: "Sun Moon Lake is a lake in Nantou County, Taiwan. It is the largest lake in Taiwan."
topic: [ "geography" ]
integer: 1
- do: - do:
index: index:
@ -58,6 +52,7 @@ setup:
topic: [ "science" ] topic: [ "science" ]
subtopic: [ "astronomy" ] subtopic: [ "astronomy" ]
integer: 2 integer: 2
inference_text_field: "0"
- do: - do:
index: index:
@ -68,6 +63,17 @@ setup:
topic: [ "science" ] topic: [ "science" ]
subtopic: [ "technology" ] subtopic: [ "technology" ]
integer: 3 integer: 3
inference_text_field: "1"
- do:
index:
index: test-index
id: doc_1
body:
text: "Sun Moon Lake is a lake in Nantou County, Taiwan. It is the largest lake in Taiwan."
topic: [ "geography" ]
integer: 1
inference_text_field: "-1"
- do: - do:
indices.refresh: {} indices.refresh: {}
@ -75,6 +81,10 @@ setup:
--- ---
"rrf retriever with a nested text similarity reranker": "rrf retriever with a nested text similarity reranker":
- requires:
cluster_features: "test_reranking_service.parse_text_as_score"
reason: test_reranking_service can now parse provided input as score to provide deterministic ranks
- do: - do:
search: search:
index: test-index index: test-index
@ -86,6 +96,7 @@ setup:
retrievers: retrievers:
[ [
{ {
# this one retrieves docs 1 and 2
standard: { standard: {
query: { query: {
bool: { bool: {
@ -120,13 +131,14 @@ setup:
} }
}, },
{ {
# this one retrieves doc 2
text_similarity_reranker: { text_similarity_reranker: {
retriever: retriever:
{ {
standard: { standard: {
query: { query: {
term: { term: {
topic: "science" subtopic: "astronomy"
} }
} }
} }
@ -134,7 +146,7 @@ setup:
rank_window_size: 10, rank_window_size: 10,
inference_id: my-rerank-model, inference_id: my-rerank-model,
inference_text: "How often does the moon hide the sun?", inference_text: "How often does the moon hide the sun?",
field: text field: inference_text_field
} }
} }
], ],
@ -149,20 +161,21 @@ setup:
field: topic field: topic
size: 10 size: 10
- match: { hits.total.value: 3 } - match: { hits.total.value: 2 }
- length: { hits.hits: 2 } - length: { hits.hits: 1 }
- contains: { hits.hits: { _id: "doc_1" } } - match: { hits.hits.0._id: "doc_1" }
- contains: { hits.hits: { _id: "doc_3" } }
- match: { aggregations.topics.buckets.0.key: "science" } - contains: { aggregations.topics.buckets: { key: "geography", doc_count: 1 } }
- match: { aggregations.topics.buckets.0.doc_count: 2 } - contains: { aggregations.topics.buckets: { key: "science", doc_count: 1 } }
- match: { aggregations.topics.buckets.1.key: "geography" }
- match: { aggregations.topics.buckets.1.doc_count: 1 }
--- ---
"Text similarity reranker on top of an RRF retriever": "Text similarity reranker on top of an RRF retriever":
- requires:
cluster_features: "test_reranking_service.parse_text_as_score"
reason: test_reranking_service can now parse provided input as score to provide deterministic ranks
- do: - do:
search: search:
index: test-index index: test-index
@ -174,10 +187,12 @@ setup:
text_similarity_reranker: { text_similarity_reranker: {
retriever: retriever:
{ {
# this one retrieves docs 1 and 3 so final ranking should be 3 and 1 based on reranking service
rrf: { rrf: {
retrievers: retrievers:
[ [
{ {
# this one retrieves docs 1 and 3
standard: { standard: {
query: { query: {
bool: { bool: {
@ -212,6 +227,7 @@ setup:
} }
}, },
{ {
# this one retrieves doc 1
standard: { standard: {
query: { query: {
term: { term: {
@ -228,7 +244,7 @@ setup:
rank_window_size: 10, rank_window_size: 10,
inference_id: my-rerank-model, inference_id: my-rerank-model,
inference_text: "How often does the moon hide the sun?", inference_text: "How often does the moon hide the sun?",
field: text field: inference_text_field
} }
} }
size: 10 size: 10
@ -253,6 +269,10 @@ setup:
--- ---
"explain using rrf retriever and text-similarity": "explain using rrf retriever and text-similarity":
- requires:
cluster_features: "test_reranking_service.parse_text_as_score"
reason: test_reranking_service can now parse provided input as score to provide deterministic ranks
- do: - do:
search: search:
index: test-index index: test-index
@ -264,6 +284,7 @@ setup:
retrievers: retrievers:
[ [
{ {
# this one retrieves docs 1 and 2
standard: { standard: {
query: { query: {
bool: { bool: {
@ -298,6 +319,7 @@ setup:
} }
}, },
{ {
# this one retrieves doc 2
text_similarity_reranker: { text_similarity_reranker: {
retriever: retriever:
{ {
@ -312,7 +334,7 @@ setup:
rank_window_size: 10, rank_window_size: 10,
inference_id: my-rerank-model, inference_id: my-rerank-model,
inference_text: "How often does the moon hide the sun?", inference_text: "How often does the moon hide the sun?",
field: text field: inference_text_field
} }
} }
], ],
@ -332,5 +354,5 @@ setup:
- match: {hits.hits.0._explanation.details.0.details.0.details.0.description: "/ConstantScore.*/" } - match: {hits.hits.0._explanation.details.0.details.0.details.0.description: "/ConstantScore.*/" }
- match: {hits.hits.0._explanation.details.1.value: 1} - match: {hits.hits.0._explanation.details.1.value: 1}
- match: {hits.hits.0._explanation.details.1.description: "/rrf.score:.\\[0.5\\].*/" } - match: {hits.hits.0._explanation.details.1.description: "/rrf.score:.\\[0.5\\].*/" }
- match: {hits.hits.0._explanation.details.1.details.0.description: "/text_similarity_reranker.match.using.inference.endpoint:.\\[my-rerank-model\\].on.document.field:.\\[text\\].*/" } - match: {hits.hits.0._explanation.details.1.details.0.description: "/text_similarity_reranker.match.using.inference.endpoint:.\\[my-rerank-model\\].on.document.field:.\\[inference_text_field\\].*/" }
- match: {hits.hits.0._explanation.details.1.details.0.details.0.description: "/weight.*astronomy.*/" } - match: {hits.hits.0._explanation.details.1.details.0.details.0.description: "/weight.*astronomy.*/" }