Fix minmax normalizer handling of single-doc result sets (#128689)

This commit is contained in:
Mike Pellegrini 2025-06-02 09:39:44 -04:00 committed by GitHub
parent d597e50117
commit adda402a4c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 54 additions and 10 deletions

View file

@ -0,0 +1,5 @@
pr: 128689
summary: Fix minmax normalizer handling of single-doc result sets
area: Search
type: bug
issues: []

View file

@ -13,6 +13,7 @@ import org.elasticsearch.features.NodeFeature;
import java.util.Set;
import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.INNER_RETRIEVERS_FILTER_SUPPORT;
import static org.elasticsearch.xpack.rank.linear.MinMaxScoreNormalizer.LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX;
public class RankRRFFeatures implements FeatureSpecification {
@ -25,6 +26,6 @@ public class RankRRFFeatures implements FeatureSpecification {
@Override
public Set<NodeFeature> getTestFeatures() {
return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT);
return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX);
}
}

View file

@ -8,11 +8,13 @@
package org.elasticsearch.xpack.rank.linear;
import org.apache.lucene.search.ScoreDoc;
import org.elasticsearch.features.NodeFeature;
public class MinMaxScoreNormalizer extends ScoreNormalizer {
public static final MinMaxScoreNormalizer INSTANCE = new MinMaxScoreNormalizer();
public static final NodeFeature LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX = new NodeFeature("linear_retriever.minmax_single_doc_fix");
public static final String NAME = "minmax";
private static final float EPSILON = 1e-6f;
@ -54,7 +56,9 @@ public class MinMaxScoreNormalizer extends ScoreNormalizer {
for (int i = 0; i < docs.length; i++) {
float score;
if (minEqualsMax) {
score = min;
// This can happen if there is only one doc in the result set or if all docs have nearly equivalent scores
// (i.e. within epsilon). In this case, assign every doc the max normalized score.
score = 1.0f;
} else {
score = (docs[i].score - min) / (max - min);
}

View file

@ -930,7 +930,7 @@ setup:
---
"linear retriever with custom sort and score for nested retrievers":
"linear retriever with custom sort for nested retrievers":
- do:
search:
index: test
@ -949,17 +949,18 @@ setup:
{
term: {
keyword: {
value: "one" # this will give doc 1 a normalized score of 10 because min == max
value: "one"
}
}
},
{
term: {
keyword: {
value: "two" # this will give doc 2 a normalized score of 10 because min == max
value: "two"
}
}
} ]
}
]
}
},
boost: 10.0
@ -1058,11 +1059,11 @@ setup:
size: 2
- match: { hits.total.value: 3 }
- length: {hits.hits: 2}
- length: { hits.hits: 2 }
- match: { hits.hits.0._id: "2" }
- close_to: { hits.hits.0._score: { value: 10.5, error: 0.001 } }
- close_to: { hits.hits.0._score: { value: 1.5, error: 0.001 } }
- match: { hits.hits.1._id: "1" }
- match: { hits.hits.1._score: 10 }
- match: { hits.hits.1._score: 1 }
---
"should throw when rank_window_size is negative":
@ -1102,3 +1103,36 @@ setup:
]
rank_window_size: -10
- match: { status: 400 }
---
"minmax normalization properly handles a single doc result set":
- requires:
cluster_features: [ "linear_retriever.minmax_single_doc_fix" ]
reason: "Fix bug where minmax normalizer would emit unnormalized score when handling a single doc result set"
- do:
search:
index: test
body:
retriever:
linear:
retrievers: [
{
retriever: {
standard: {
query: {
term: {
"keyword": {
"value": "one"
}
}
}
}
},
normalizer: "minmax"
}
]
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "1" }
- match: { hits.hits.0._score: 1.0 }