From adda402a4c2b15d12ccc4174f5bb55ce072c2817 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Mon, 2 Jun 2025 09:39:44 -0400 Subject: [PATCH] Fix minmax normalizer handling of single-doc result sets (#128689) --- docs/changelog/128689.yaml | 5 ++ .../xpack/rank/RankRRFFeatures.java | 3 +- .../rank/linear/MinMaxScoreNormalizer.java | 8 +++- .../test/linear/10_linear_retriever.yml | 48 ++++++++++++++++--- 4 files changed, 54 insertions(+), 10 deletions(-) create mode 100644 docs/changelog/128689.yaml diff --git a/docs/changelog/128689.yaml b/docs/changelog/128689.yaml new file mode 100644 index 000000000000..f5f41c51168c --- /dev/null +++ b/docs/changelog/128689.yaml @@ -0,0 +1,5 @@ +pr: 128689 +summary: Fix minmax normalizer handling of single-doc result sets +area: Search +type: bug +issues: [] diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java index 5966e17f2042..5119c4ee3e7e 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java @@ -13,6 +13,7 @@ import org.elasticsearch.features.NodeFeature; import java.util.Set; import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.INNER_RETRIEVERS_FILTER_SUPPORT; +import static org.elasticsearch.xpack.rank.linear.MinMaxScoreNormalizer.LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX; public class RankRRFFeatures implements FeatureSpecification { @@ -25,6 +26,6 @@ public class RankRRFFeatures implements FeatureSpecification { @Override public Set getTestFeatures() { - return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT); + return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX); } } diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/MinMaxScoreNormalizer.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/MinMaxScoreNormalizer.java index 56b42b48a5d4..b41129f29aed 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/MinMaxScoreNormalizer.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/MinMaxScoreNormalizer.java @@ -8,11 +8,13 @@ package org.elasticsearch.xpack.rank.linear; import org.apache.lucene.search.ScoreDoc; +import org.elasticsearch.features.NodeFeature; public class MinMaxScoreNormalizer extends ScoreNormalizer { - public static final MinMaxScoreNormalizer INSTANCE = new MinMaxScoreNormalizer(); + public static final NodeFeature LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX = new NodeFeature("linear_retriever.minmax_single_doc_fix"); + public static final String NAME = "minmax"; private static final float EPSILON = 1e-6f; @@ -54,7 +56,9 @@ public class MinMaxScoreNormalizer extends ScoreNormalizer { for (int i = 0; i < docs.length; i++) { float score; if (minEqualsMax) { - score = min; + // This can happen if there is only one doc in the result set or if all docs have nearly equivalent scores + // (i.e. within epsilon). In this case, assign every doc the max normalized score. + score = 1.0f; } else { score = (docs[i].score - min) / (max - min); } diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml index 2d644a574647..52ab532462e4 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml @@ -930,7 +930,7 @@ setup: --- -"linear retriever with custom sort and score for nested retrievers": +"linear retriever with custom sort for nested retrievers": - do: search: index: test @@ -949,17 +949,18 @@ setup: { term: { keyword: { - value: "one" # this will give doc 1 a normalized score of 10 because min == max + value: "one" } } }, { term: { keyword: { - value: "two" # this will give doc 2 a normalized score of 10 because min == max + value: "two" } } - } ] + } + ] } }, boost: 10.0 @@ -1058,11 +1059,11 @@ setup: size: 2 - match: { hits.total.value: 3 } - - length: {hits.hits: 2} + - length: { hits.hits: 2 } - match: { hits.hits.0._id: "2" } - - close_to: { hits.hits.0._score: { value: 10.5, error: 0.001 } } + - close_to: { hits.hits.0._score: { value: 1.5, error: 0.001 } } - match: { hits.hits.1._id: "1" } - - match: { hits.hits.1._score: 10 } + - match: { hits.hits.1._score: 1 } --- "should throw when rank_window_size is negative": @@ -1102,3 +1103,36 @@ setup: ] rank_window_size: -10 - match: { status: 400 } + +--- +"minmax normalization properly handles a single doc result set": + - requires: + cluster_features: [ "linear_retriever.minmax_single_doc_fix" ] + reason: "Fix bug where minmax normalizer would emit unnormalized score when handling a single doc result set" + + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + term: { + "keyword": { + "value": "one" + } + } + } + } + }, + normalizer: "minmax" + } + ] + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._score: 1.0 }