Fix minmax normalizer handling of single-doc result sets (#128689)

This commit is contained in:
Mike Pellegrini 2025-06-02 09:39:44 -04:00 committed by GitHub
parent d597e50117
commit adda402a4c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 54 additions and 10 deletions

View file

@ -0,0 +1,5 @@
pr: 128689
summary: Fix minmax normalizer handling of single-doc result sets
area: Search
type: bug
issues: []

View file

@ -13,6 +13,7 @@ import org.elasticsearch.features.NodeFeature;
import java.util.Set; import java.util.Set;
import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.INNER_RETRIEVERS_FILTER_SUPPORT; import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.INNER_RETRIEVERS_FILTER_SUPPORT;
import static org.elasticsearch.xpack.rank.linear.MinMaxScoreNormalizer.LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX;
public class RankRRFFeatures implements FeatureSpecification { public class RankRRFFeatures implements FeatureSpecification {
@ -25,6 +26,6 @@ public class RankRRFFeatures implements FeatureSpecification {
@Override @Override
public Set<NodeFeature> getTestFeatures() { public Set<NodeFeature> getTestFeatures() {
return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT); return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX);
} }
} }

View file

@ -8,11 +8,13 @@
package org.elasticsearch.xpack.rank.linear; package org.elasticsearch.xpack.rank.linear;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.elasticsearch.features.NodeFeature;
public class MinMaxScoreNormalizer extends ScoreNormalizer { public class MinMaxScoreNormalizer extends ScoreNormalizer {
public static final MinMaxScoreNormalizer INSTANCE = new MinMaxScoreNormalizer(); public static final MinMaxScoreNormalizer INSTANCE = new MinMaxScoreNormalizer();
public static final NodeFeature LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX = new NodeFeature("linear_retriever.minmax_single_doc_fix");
public static final String NAME = "minmax"; public static final String NAME = "minmax";
private static final float EPSILON = 1e-6f; private static final float EPSILON = 1e-6f;
@ -54,7 +56,9 @@ public class MinMaxScoreNormalizer extends ScoreNormalizer {
for (int i = 0; i < docs.length; i++) { for (int i = 0; i < docs.length; i++) {
float score; float score;
if (minEqualsMax) { if (minEqualsMax) {
score = min; // This can happen if there is only one doc in the result set or if all docs have nearly equivalent scores
// (i.e. within epsilon). In this case, assign every doc the max normalized score.
score = 1.0f;
} else { } else {
score = (docs[i].score - min) / (max - min); score = (docs[i].score - min) / (max - min);
} }

View file

@ -930,7 +930,7 @@ setup:
--- ---
"linear retriever with custom sort and score for nested retrievers": "linear retriever with custom sort for nested retrievers":
- do: - do:
search: search:
index: test index: test
@ -949,17 +949,18 @@ setup:
{ {
term: { term: {
keyword: { keyword: {
value: "one" # this will give doc 1 a normalized score of 10 because min == max value: "one"
} }
} }
}, },
{ {
term: { term: {
keyword: { keyword: {
value: "two" # this will give doc 2 a normalized score of 10 because min == max value: "two"
} }
} }
} ] }
]
} }
}, },
boost: 10.0 boost: 10.0
@ -1058,11 +1059,11 @@ setup:
size: 2 size: 2
- match: { hits.total.value: 3 } - match: { hits.total.value: 3 }
- length: {hits.hits: 2} - length: { hits.hits: 2 }
- match: { hits.hits.0._id: "2" } - match: { hits.hits.0._id: "2" }
- close_to: { hits.hits.0._score: { value: 10.5, error: 0.001 } } - close_to: { hits.hits.0._score: { value: 1.5, error: 0.001 } }
- match: { hits.hits.1._id: "1" } - match: { hits.hits.1._id: "1" }
- match: { hits.hits.1._score: 10 } - match: { hits.hits.1._score: 1 }
--- ---
"should throw when rank_window_size is negative": "should throw when rank_window_size is negative":
@ -1102,3 +1103,36 @@ setup:
] ]
rank_window_size: -10 rank_window_size: -10
- match: { status: 400 } - match: { status: 400 }
---
"minmax normalization properly handles a single doc result set":
- requires:
cluster_features: [ "linear_retriever.minmax_single_doc_fix" ]
reason: "Fix bug where minmax normalizer would emit unnormalized score when handling a single doc result set"
- do:
search:
index: test
body:
retriever:
linear:
retrievers: [
{
retriever: {
standard: {
query: {
term: {
"keyword": {
"value": "one"
}
}
}
}
},
normalizer: "minmax"
}
]
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "1" }
- match: { hits.hits.0._score: 1.0 }