mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-28 09:28:55 -04:00
Fix minmax normalizer handling of single-doc result sets (#128689)
This commit is contained in:
parent
d597e50117
commit
adda402a4c
4 changed files with 54 additions and 10 deletions
5
docs/changelog/128689.yaml
Normal file
5
docs/changelog/128689.yaml
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
pr: 128689
|
||||||
|
summary: Fix minmax normalizer handling of single-doc result sets
|
||||||
|
area: Search
|
||||||
|
type: bug
|
||||||
|
issues: []
|
|
@ -13,6 +13,7 @@ import org.elasticsearch.features.NodeFeature;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.INNER_RETRIEVERS_FILTER_SUPPORT;
|
import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.INNER_RETRIEVERS_FILTER_SUPPORT;
|
||||||
|
import static org.elasticsearch.xpack.rank.linear.MinMaxScoreNormalizer.LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX;
|
||||||
|
|
||||||
public class RankRRFFeatures implements FeatureSpecification {
|
public class RankRRFFeatures implements FeatureSpecification {
|
||||||
|
|
||||||
|
@ -25,6 +26,6 @@ public class RankRRFFeatures implements FeatureSpecification {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Set<NodeFeature> getTestFeatures() {
|
public Set<NodeFeature> getTestFeatures() {
|
||||||
return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT);
|
return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,11 +8,13 @@
|
||||||
package org.elasticsearch.xpack.rank.linear;
|
package org.elasticsearch.xpack.rank.linear;
|
||||||
|
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
|
import org.elasticsearch.features.NodeFeature;
|
||||||
|
|
||||||
public class MinMaxScoreNormalizer extends ScoreNormalizer {
|
public class MinMaxScoreNormalizer extends ScoreNormalizer {
|
||||||
|
|
||||||
public static final MinMaxScoreNormalizer INSTANCE = new MinMaxScoreNormalizer();
|
public static final MinMaxScoreNormalizer INSTANCE = new MinMaxScoreNormalizer();
|
||||||
|
|
||||||
|
public static final NodeFeature LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX = new NodeFeature("linear_retriever.minmax_single_doc_fix");
|
||||||
|
|
||||||
public static final String NAME = "minmax";
|
public static final String NAME = "minmax";
|
||||||
|
|
||||||
private static final float EPSILON = 1e-6f;
|
private static final float EPSILON = 1e-6f;
|
||||||
|
@ -54,7 +56,9 @@ public class MinMaxScoreNormalizer extends ScoreNormalizer {
|
||||||
for (int i = 0; i < docs.length; i++) {
|
for (int i = 0; i < docs.length; i++) {
|
||||||
float score;
|
float score;
|
||||||
if (minEqualsMax) {
|
if (minEqualsMax) {
|
||||||
score = min;
|
// This can happen if there is only one doc in the result set or if all docs have nearly equivalent scores
|
||||||
|
// (i.e. within epsilon). In this case, assign every doc the max normalized score.
|
||||||
|
score = 1.0f;
|
||||||
} else {
|
} else {
|
||||||
score = (docs[i].score - min) / (max - min);
|
score = (docs[i].score - min) / (max - min);
|
||||||
}
|
}
|
||||||
|
|
|
@ -930,7 +930,7 @@ setup:
|
||||||
|
|
||||||
|
|
||||||
---
|
---
|
||||||
"linear retriever with custom sort and score for nested retrievers":
|
"linear retriever with custom sort for nested retrievers":
|
||||||
- do:
|
- do:
|
||||||
search:
|
search:
|
||||||
index: test
|
index: test
|
||||||
|
@ -949,17 +949,18 @@ setup:
|
||||||
{
|
{
|
||||||
term: {
|
term: {
|
||||||
keyword: {
|
keyword: {
|
||||||
value: "one" # this will give doc 1 a normalized score of 10 because min == max
|
value: "one"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
term: {
|
term: {
|
||||||
keyword: {
|
keyword: {
|
||||||
value: "two" # this will give doc 2 a normalized score of 10 because min == max
|
value: "two"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} ]
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
boost: 10.0
|
boost: 10.0
|
||||||
|
@ -1058,11 +1059,11 @@ setup:
|
||||||
size: 2
|
size: 2
|
||||||
|
|
||||||
- match: { hits.total.value: 3 }
|
- match: { hits.total.value: 3 }
|
||||||
- length: {hits.hits: 2}
|
- length: { hits.hits: 2 }
|
||||||
- match: { hits.hits.0._id: "2" }
|
- match: { hits.hits.0._id: "2" }
|
||||||
- close_to: { hits.hits.0._score: { value: 10.5, error: 0.001 } }
|
- close_to: { hits.hits.0._score: { value: 1.5, error: 0.001 } }
|
||||||
- match: { hits.hits.1._id: "1" }
|
- match: { hits.hits.1._id: "1" }
|
||||||
- match: { hits.hits.1._score: 10 }
|
- match: { hits.hits.1._score: 1 }
|
||||||
|
|
||||||
---
|
---
|
||||||
"should throw when rank_window_size is negative":
|
"should throw when rank_window_size is negative":
|
||||||
|
@ -1102,3 +1103,36 @@ setup:
|
||||||
]
|
]
|
||||||
rank_window_size: -10
|
rank_window_size: -10
|
||||||
- match: { status: 400 }
|
- match: { status: 400 }
|
||||||
|
|
||||||
|
---
|
||||||
|
"minmax normalization properly handles a single doc result set":
|
||||||
|
- requires:
|
||||||
|
cluster_features: [ "linear_retriever.minmax_single_doc_fix" ]
|
||||||
|
reason: "Fix bug where minmax normalizer would emit unnormalized score when handling a single doc result set"
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
retriever:
|
||||||
|
linear:
|
||||||
|
retrievers: [
|
||||||
|
{
|
||||||
|
retriever: {
|
||||||
|
standard: {
|
||||||
|
query: {
|
||||||
|
term: {
|
||||||
|
"keyword": {
|
||||||
|
"value": "one"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
normalizer: "minmax"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
- match: { hits.total.value: 1 }
|
||||||
|
- match: { hits.hits.0._id: "1" }
|
||||||
|
- match: { hits.hits.0._score: 1.0 }
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue