Update test_reranking_service to try and parse provided inputs as scores (#122328)

2025-07-18 19:54:14 -04:00 · 2025-02-14 12:30:10 +02:00 · 2025-02-14 12:30:10 +02:00 · 95f8454e40
commit 95f8454e40
parent 37f974546d
4 changed files with 156 additions and 55 deletions
--- a/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestRerankingServiceExtension.java
+++ b/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestRerankingServiceExtension.java
@ -36,6 +36,7 @@ import org.elasticsearch.xpack.core.inference.results.RankedDocsResults;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.List;
@ -148,17 +149,29 @@ public class TestRerankingServiceExtension implements InferenceServiceExtension
        }
        private RankedDocsResults makeResults(List<String> input) {
            List<RankedDocsResults.RankedDoc> results = new ArrayList<>();
            int totalResults = input.size();
            try {
                List<RankedDocsResults.RankedDoc> results = new ArrayList<>();
                for (int i = 0; i < totalResults; i++) {
                    results.add(new RankedDocsResults.RankedDoc(i, Float.parseFloat(input.get(i)), input.get(i)));
                }
                return new RankedDocsResults(results.stream().sorted(Comparator.reverseOrder()).toList());
            } catch (NumberFormatException ex) {
                List<RankedDocsResults.RankedDoc> results = new ArrayList<>();
                float minScore = random.nextFloat(-1f, 1f);
                float resultDiff = 0.2f;
                for (int i = 0; i < input.size(); i++) {
                    results.add(
-                    new RankedDocsResults.RankedDoc(totalResults - 1 - i, minScore + resultDiff * (totalResults - i), input.get(i))
+                        new RankedDocsResults.RankedDoc(
                            totalResults - 1 - i,
                            minScore + resultDiff * (totalResults - i),
                            input.get(totalResults - 1 - i)
                        )
                    );
                }
                return new RankedDocsResults(results);
            }
        }
        protected ServiceSettings getServiceSettingsFromMap(Map<String, Object> serviceSettingsMap) {
            return TestServiceSettings.fromMap(serviceSettingsMap);
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java
@ -27,6 +27,9 @@ public class InferenceFeatures implements FeatureSpecification {
    private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER = new NodeFeature("semantic_text.highlighter");
    private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT = new NodeFeature("semantic_text.highlighter.default");
    private static final NodeFeature TEST_RERANKING_SERVICE_PARSE_TEXT_AS_SCORE = new NodeFeature(
        "test_reranking_service.parse_text_as_score"
    );
    @Override
    public Set<NodeFeature> getTestFeatures() {
@ -45,7 +48,8 @@ public class InferenceFeatures implements FeatureSpecification {
            TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_ALIAS_HANDLING_FIX,
            SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT,
            SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT,
-            SEMANTIC_KNN_FILTER_FIX
+            SEMANTIC_KNN_FILTER_FIX,
            TEST_RERANKING_SERVICE_PARSE_TEXT_AS_SCORE
        );
    }
 }
--- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml
+++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml
@ -2,7 +2,6 @@ setup:
  - skip:
      features:
        - close_to
        - contains
  - requires:
      test_runner_features: "close_to"
@ -33,16 +32,8 @@ setup:
                type: keyword
              subtopic:
                type: keyword
-
+              inference_text_field:
-  - do:
+                type: text
      index:
        index: test-index
        id: doc_1
        body:
          text: "As seen from Earth, a solar eclipse happens when the Moon is directly between the Earth and the Sun."
          topic: [ "science" ]
          subtopic: [ "technology" ]
        refresh: true
  - do:
      index:
@ -52,6 +43,7 @@ setup:
          text: "The phases of the Moon come from the position of the Moon relative to the Earth and Sun."
          topic: [ "science" ]
          subtopic: [ "astronomy" ]
          inference_text_field: "0"
        refresh: true
  - do:
@ -61,11 +53,27 @@ setup:
        body:
          text: "Sun Moon Lake is a lake in Nantou County, Taiwan. It is the largest lake in Taiwan."
          topic: [ "geography" ]
          inference_text_field: "1"
        refresh: true
  - do:
      index:
        index: test-index
        id: doc_1
        body:
          text: "As seen from Earth, a solar eclipse happens when the Moon is directly between the Earth and the Sun."
          topic: [ "science" ]
          subtopic: [ "technology" ]
          inference_text_field: "-1"
        refresh: true
 ---
 "Simple text similarity rank retriever":
  - requires:
      cluster_features: "test_reranking_service.parse_text_as_score"
      reason: test_reranking_service can now parse provided input as score to provide deterministic ranks
  - do:
      search:
        index: test-index
@ -75,14 +83,37 @@ setup:
          retriever:
            text_similarity_reranker:
              retriever:
                # this one returns docs 1 and 2
                standard:
                  query:
-                    term:
+                    bool: {
-                      topic: "science"
+                      should: [
                        {
                          constant_score: {
                            filter: {
                              term: {
                                subtopic: "technology"
                              }
                            },
                            boost: 10
                          }
                        },
                        {
                          constant_score: {
                            filter: {
                              term: {
                                subtopic: "astronomy"
                              }
                            },
                            boost: 1
                          }
                        }
                      ]
                    }
              rank_window_size: 10
              inference_id: my-rerank-model
              inference_text: "How often does the moon hide the sun?"
-              field: text
+              field: inference_text_field
          size: 10
  - match: { hits.total.value: 2 }
@ -94,6 +125,10 @@ setup:
 ---
 "Simple text similarity rank retriever and filtering":
  - requires:
      cluster_features: "test_reranking_service.parse_text_as_score"
      reason: test_reranking_service can now parse provided input as score to provide deterministic ranks
  - do:
      search:
        index: test-index
@ -103,6 +138,7 @@ setup:
          retriever:
            text_similarity_reranker:
              retriever:
                # this one returns doc 1
                standard:
                  query:
                    term:
@ -113,7 +149,7 @@ setup:
              rank_window_size: 10
              inference_id: my-rerank-model
              inference_text: "How often does the moon hide the sun?"
-              field: text
+              field: inference_text_field
          size: 10
  - match: { hits.total.value: 1 }
@ -143,7 +179,7 @@ setup:
              rank_window_size: 10
              inference_id: i-dont-exist
              inference_text: "How often does the moon hide the sun?"
-              field: text
+              field: inference_text_field
          size: 10
 ---
@ -169,13 +205,17 @@ setup:
              rank_window_size: 10
              inference_id: i-dont-exist
              inference_text: "asdfasdf"
-              field: text
+              field: inference_text_field
          size: 10
 ---
 "text similarity reranking with explain":
  - requires:
      cluster_features: "test_reranking_service.parse_text_as_score"
      reason: test_reranking_service can now parse provided input as score to provide deterministic ranks
  - do:
      search:
        index: test-index
@ -186,10 +226,32 @@ setup:
            text_similarity_reranker: {
              retriever:
                {
                  # this one returns doc 1 and 2
                  standard: {
                    query: {
                      bool: {
                        should: [
                          {
                            constant_score: {
                              filter: {
                                term: {
-                        topic: "science"
+                                  subtopic: "technology"
                                }
                              },
                              boost: 10
                            }
                          },
                          {
                            constant_score: {
                              filter: {
                                term: {
                                  subtopic: "astronomy"
                                }
                              },
                              boost: 1
                            }
                          }
                        ]
                      }
                    }
                  }
@ -197,17 +259,17 @@ setup:
              rank_window_size: 10,
              inference_id: my-rerank-model,
              inference_text: "How often does the moon hide the sun?",
-              field: text
+              field: inference_text_field
            }
          }
          size: 10
          explain: true
-  - contains: { hits.hits: { _id: "doc_2" } }
+  - match: { hits.hits.0._id: "doc_2" }
-  - contains: { hits.hits: { _id: "doc_1" } }
+  - match: { hits.hits.1._id: "doc_1" }
-  - match: {hits.hits.0._explanation.description: "/text_similarity_reranker.match.using.inference.endpoint:.\\[my-rerank-model\\].on.document.field:.\\[text\\].*/" }
+  - match: {hits.hits.0._explanation.description: "/text_similarity_reranker.match.using.inference.endpoint:.\\[my-rerank-model\\].on.document.field:.\\[inference_text_field\\].*/" }
-  - match: {hits.hits.0._explanation.details.0.description: "/weight.*science.*/" }
+  - match: {hits.hits.0._explanation.details.0.details.0.description: "/subtopic.*astronomy.*/" }
 ---
 "text similarity reranker properly handles aliases":
@ -281,7 +343,7 @@ setup:
              rank_window_size: 10
              inference_id: my-rerank-model
              inference_text: "How often does the moon hide the sun?"
-              field: text
+              field: inference_text_field
          size: 10
  - match: { hits.total.value: 1 }
--- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/800_rrf_with_text_similarity_reranker_retriever.yml
+++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/800_rrf_with_text_similarity_reranker_retriever.yml
@ -39,15 +39,9 @@ setup:
                type: keyword
              integer:
                type: integer
              inference_text_field:
                type: text
  - do:
      index:
        index: test-index
        id: doc_1
        body:
          text: "Sun Moon Lake is a lake in Nantou County, Taiwan. It is the largest lake in Taiwan."
          topic: [ "geography" ]
          integer: 1
  - do:
      index:
@ -58,6 +52,7 @@ setup:
          topic: [ "science" ]
          subtopic: [ "astronomy" ]
          integer: 2
          inference_text_field: "0"
  - do:
      index:
@ -68,6 +63,17 @@ setup:
          topic: [ "science" ]
          subtopic: [ "technology" ]
          integer: 3
          inference_text_field: "1"
  - do:
      index:
        index: test-index
        id: doc_1
        body:
          text: "Sun Moon Lake is a lake in Nantou County, Taiwan. It is the largest lake in Taiwan."
          topic: [ "geography" ]
          integer: 1
          inference_text_field: "-1"
  - do:
      indices.refresh: {}
@ -75,6 +81,10 @@ setup:
 ---
 "rrf retriever with a nested text similarity reranker":
  - requires:
      cluster_features: "test_reranking_service.parse_text_as_score"
      reason: test_reranking_service can now parse provided input as score to provide deterministic ranks
  - do:
      search:
        index: test-index
@ -86,6 +96,7 @@ setup:
              retrievers:
                [
                  {
                    # this one retrieves docs 1 and 2
                    standard: {
                      query: {
                        bool: {
@ -120,13 +131,14 @@ setup:
                    }
                  },
                  {
                    # this one retrieves doc 2
                    text_similarity_reranker: {
                      retriever:
                        {
                          standard: {
                            query: {
                              term: {
-                                topic: "science"
+                                subtopic: "astronomy"
                              }
                            }
                          }
@ -134,7 +146,7 @@ setup:
                      rank_window_size: 10,
                      inference_id: my-rerank-model,
                      inference_text: "How often does the moon hide the sun?",
-                      field: text
+                      field: inference_text_field
                    }
                  }
                ],
@ -149,20 +161,21 @@ setup:
                field: topic
                size: 10
-  - match: { hits.total.value: 3 }
+  - match: { hits.total.value: 2 }
-  - length: { hits.hits: 2 }
+  - length: { hits.hits: 1 }
-  - contains: { hits.hits: { _id: "doc_1" } }
+  - match: { hits.hits.0._id: "doc_1" }
  - contains: { hits.hits: { _id: "doc_3" } }
-  - match: { aggregations.topics.buckets.0.key: "science" }
+  - contains:  { aggregations.topics.buckets: { key: "geography", doc_count: 1 } }
-  - match: { aggregations.topics.buckets.0.doc_count: 2 }
+  - contains:  { aggregations.topics.buckets: { key: "science", doc_count: 1 } }
  - match: { aggregations.topics.buckets.1.key: "geography" }
  - match: { aggregations.topics.buckets.1.doc_count: 1 }
 ---
 "Text similarity reranker on top of an RRF retriever":
  - requires:
      cluster_features: "test_reranking_service.parse_text_as_score"
      reason: test_reranking_service can now parse provided input as score to provide deterministic ranks
  - do:
      search:
        index: test-index
@ -174,10 +187,12 @@ setup:
              text_similarity_reranker: {
                retriever:
                  {
                    # this one retrieves docs 1 and 3 so final ranking should be 3 and 1 based on reranking service
                    rrf: {
                      retrievers:
                        [
                          {
                            # this one retrieves docs 1 and 3
                            standard: {
                              query: {
                                bool: {
@ -212,6 +227,7 @@ setup:
                            }
                          },
                          {
                            # this one retrieves doc 1
                            standard: {
                              query: {
                                term: {
@ -228,7 +244,7 @@ setup:
                rank_window_size: 10,
                inference_id: my-rerank-model,
                inference_text: "How often does the moon hide the sun?",
-                field: text
+                field: inference_text_field
              }
            }
          size: 10
@ -253,6 +269,10 @@ setup:
 ---
 "explain using rrf retriever and text-similarity":
  - requires:
      cluster_features: "test_reranking_service.parse_text_as_score"
      reason: test_reranking_service can now parse provided input as score to provide deterministic ranks
  - do:
      search:
        index: test-index
@ -264,6 +284,7 @@ setup:
              retrievers:
                [
                  {
                    # this one retrieves docs 1 and 2
                    standard: {
                      query: {
                        bool: {
@ -298,6 +319,7 @@ setup:
                    }
                  },
                  {
                    # this one retrieves doc 2
                    text_similarity_reranker: {
                      retriever:
                        {
@ -312,7 +334,7 @@ setup:
                      rank_window_size: 10,
                      inference_id: my-rerank-model,
                      inference_text: "How often does the moon hide the sun?",
-                      field: text
+                      field: inference_text_field
                    }
                  }
                ],
@ -332,5 +354,5 @@ setup:
  - match: {hits.hits.0._explanation.details.0.details.0.details.0.description: "/ConstantScore.*/" }
  - match: {hits.hits.0._explanation.details.1.value: 1}
  - match: {hits.hits.0._explanation.details.1.description: "/rrf.score:.\\[0.5\\].*/" }
-  - match: {hits.hits.0._explanation.details.1.details.0.description: "/text_similarity_reranker.match.using.inference.endpoint:.\\[my-rerank-model\\].on.document.field:.\\[text\\].*/" }
+  - match: {hits.hits.0._explanation.details.1.details.0.description: "/text_similarity_reranker.match.using.inference.endpoint:.\\[my-rerank-model\\].on.document.field:.\\[inference_text_field\\].*/" }
  - match: {hits.hits.0._explanation.details.1.details.0.details.0.description: "/weight.*astronomy.*/" }