Fix minmax normalizer handling of single-doc result sets (#128689)

2025-06-28 09:28:55 -04:00 · 2025-06-02 09:39:44 -04:00 · 2025-06-02 09:39:44 -04:00 · adda402a4c
commit adda402a4c
parent d597e50117
4 changed files with 54 additions and 10 deletions
--- a/docs/changelog/128689.yaml
+++ b/docs/changelog/128689.yaml
@ -0,0 +1,5 @@
+pr: 128689
+summary: Fix minmax normalizer handling of single-doc result sets
+area: Search
+type: bug
+issues: []
--- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java
+++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java
@ -13,6 +13,7 @@ import org.elasticsearch.features.NodeFeature;
 import java.util.Set;

 import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.INNER_RETRIEVERS_FILTER_SUPPORT;
+import static org.elasticsearch.xpack.rank.linear.MinMaxScoreNormalizer.LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX;

 public class RankRRFFeatures implements FeatureSpecification {

@ -25,6 +26,6 @@ public class RankRRFFeatures implements FeatureSpecification {

    @Override
    public Set<NodeFeature> getTestFeatures() {
-        return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT);
+        return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX);
    }
 }
--- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/MinMaxScoreNormalizer.java
+++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/MinMaxScoreNormalizer.java
@ -8,11 +8,13 @@
 package org.elasticsearch.xpack.rank.linear;

 import org.apache.lucene.search.ScoreDoc;
+import org.elasticsearch.features.NodeFeature;

 public class MinMaxScoreNormalizer extends ScoreNormalizer {
-
    public static final MinMaxScoreNormalizer INSTANCE = new MinMaxScoreNormalizer();

+    public static final NodeFeature LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX = new NodeFeature("linear_retriever.minmax_single_doc_fix");
+
    public static final String NAME = "minmax";

    private static final float EPSILON = 1e-6f;
@ -54,7 +56,9 @@ public class MinMaxScoreNormalizer extends ScoreNormalizer {
        for (int i = 0; i < docs.length; i++) {
            float score;
            if (minEqualsMax) {
-                score = min;
+                // This can happen if there is only one doc in the result set or if all docs have nearly equivalent scores
+                // (i.e. within epsilon). In this case, assign every doc the max normalized score.
+                score = 1.0f;
            } else {
                score = (docs[i].score - min) / (max - min);
            }
--- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml
+++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml
@ -930,7 +930,7 @@ setup:


 ---
-"linear retriever with custom sort and score for nested retrievers":
+"linear retriever with custom sort for nested retrievers":
  - do:
      search:
        index: test
@ -949,17 +949,18 @@ setup:
                                {
                                  term: {
                                    keyword: {
-                                      value: "one"  # this will give doc 1 a normalized score of 10 because min == max
+                                      value: "one"
                                    }
                                  }
                                },
                                {
                                  term: {
                                    keyword: {
-                                      value: "two" # this will give doc 2 a normalized score of 10 because min == max
+                                      value: "two"
                                    }
                                  }
-                                } ]
+                                }
+                              ]
                            }
                          },
                          boost: 10.0
@ -1058,11 +1059,11 @@ setup:
          size: 2

  - match: { hits.total.value: 3 }
-  - length: {hits.hits: 2}
+  - length: { hits.hits: 2 }
  - match: { hits.hits.0._id: "2" }
-  - close_to: { hits.hits.0._score: { value: 10.5, error: 0.001 } }
+  - close_to: { hits.hits.0._score: { value: 1.5, error: 0.001 } }
  - match: { hits.hits.1._id: "1" }
-  - match: { hits.hits.1._score: 10 }
+  - match: { hits.hits.1._score: 1 }

 ---
 "should throw when rank_window_size is negative":
@ -1102,3 +1103,36 @@ setup:
              ]
              rank_window_size: -10
  - match: { status: 400 }
+
+---
+"minmax normalization properly handles a single doc result set":
+  - requires:
+      cluster_features: [ "linear_retriever.minmax_single_doc_fix" ]
+      reason: "Fix bug where minmax normalizer would emit unnormalized score when handling a single doc result set"
+
+  - do:
+      search:
+        index: test
+        body:
+          retriever:
+            linear:
+              retrievers: [
+                {
+                  retriever: {
+                    standard: {
+                      query: {
+                        term: {
+                          "keyword": {
+                            "value": "one"
+                          }
+                        }
+                      }
+                    }
+                  },
+                  normalizer: "minmax"
+                }
+              ]
+
+  - match: { hits.total.value: 1 }
+  - match: { hits.hits.0._id: "1" }
+  - match: { hits.hits.0._score: 1.0 }