mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-28 09:28:55 -04:00
Enable Mapped Field Types to Override Default Highlighter (#121176)
This commit introduces the `MappedFieldType#getDefaultHighlighter`, allowing a specific highlighter to be enforced for a field. The semantic field mapper utilizes this new functionality to set the `semantic` highlighter as the default. All other fields will continue to use the `unified` highlighter by default.
This commit is contained in:
parent
6486299371
commit
dbeb55cb3d
9 changed files with 128 additions and 26 deletions
|
@ -133,14 +133,13 @@ You can extract the most relevant fragments from a semantic text field by using
|
|||
POST test-index/_search
|
||||
{
|
||||
"query": {
|
||||
"semantic": {
|
||||
"field": "my_semantic_field"
|
||||
"match": {
|
||||
"my_semantic_field": "Which country is Paris in?"
|
||||
}
|
||||
},
|
||||
"highlight": {
|
||||
"fields": {
|
||||
"my_semantic_field": {
|
||||
"type": "semantic",
|
||||
"number_of_fragments": 2, <1>
|
||||
"order": "score" <2>
|
||||
}
|
||||
|
@ -152,6 +151,33 @@ POST test-index/_search
|
|||
<1> Specifies the maximum number of fragments to return.
|
||||
<2> Sorts highlighted fragments by score when set to `score`. By default, fragments will be output in the order they appear in the field (order: none).
|
||||
|
||||
Highlighting is supported on fields other than semantic_text.
|
||||
However, if you want to restrict highlighting to the semantic highlighter and return no fragments when the field is not of type semantic_text,
|
||||
you can explicitly enforce the `semantic` highlighter in the query:
|
||||
|
||||
[source,console]
|
||||
------------------------------------------------------------
|
||||
PUT test-index
|
||||
{
|
||||
"query": {
|
||||
"match": {
|
||||
"my_field": "Which country is Paris in?"
|
||||
}
|
||||
},
|
||||
"highlight": {
|
||||
"fields": {
|
||||
"my_field": {
|
||||
"type": "semantic", <1>
|
||||
"number_of_fragments": 2,
|
||||
"order": "score"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
------------------------------------------------------------
|
||||
// TEST[skip:Requires inference endpoint]
|
||||
<1> Ensures that highlighting is applied exclusively to semantic_text fields.
|
||||
|
||||
[discrete]
|
||||
[[custom-indexing]]
|
||||
==== Customizing `semantic_text` indexing
|
||||
|
|
|
@ -37,8 +37,8 @@ GET /_search
|
|||
// TEST[setup:my_index]
|
||||
|
||||
{es} supports three highlighters: `unified`, `plain`, and `fvh` (fast vector
|
||||
highlighter). You can specify the highlighter `type` you want to use
|
||||
for each field.
|
||||
highlighter) for `text` and `keyword` fields and the `semantic` highlighter for `semantic_text` fields.
|
||||
You can specify the highlighter `type` you want to use for each field or rely on the field type's default highlighter.
|
||||
|
||||
[discrete]
|
||||
[[unified-highlighter]]
|
||||
|
@ -48,7 +48,19 @@ highlighter breaks the text into sentences and uses the BM25 algorithm to score
|
|||
individual sentences as if they were documents in the corpus. It also supports
|
||||
accurate phrase and multi-term (fuzzy, prefix, regex) highlighting. The `unified`
|
||||
highlighter can combine matches from multiple fields into one result (see
|
||||
`matched_fields`). This is the default highlighter.
|
||||
`matched_fields`).
|
||||
|
||||
This is the default highlighter for all `text` and `keyword` fields.
|
||||
|
||||
[discrete]
|
||||
[[semantic-highlighter]]
|
||||
==== Semantic Highlighter
|
||||
|
||||
The `semantic` highlighter is specifically designed for use with the <<semantic-text, `semantic_text`>> field.
|
||||
It identifies and extracts the most relevant fragments from the field based on semantic
|
||||
similarity between the query and each fragment.
|
||||
|
||||
By default, <<semantic-text, `semantic_text`>> fields use the semantic highlighter.
|
||||
|
||||
[discrete]
|
||||
[[plain-highlighter]]
|
||||
|
|
|
@ -41,6 +41,7 @@ import org.elasticsearch.index.query.QueryShardException;
|
|||
import org.elasticsearch.index.query.SearchExecutionContext;
|
||||
import org.elasticsearch.search.DocValueFormat;
|
||||
import org.elasticsearch.search.fetch.subphase.FetchFieldsPhase;
|
||||
import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter;
|
||||
import org.elasticsearch.search.lookup.SearchLookup;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -217,6 +218,13 @@ public abstract class MappedFieldType {
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the default highlighter type to use when highlighting the field.
|
||||
*/
|
||||
public String getDefaultHighlighter() {
|
||||
return DefaultHighlighter.NAME;
|
||||
}
|
||||
|
||||
/** Generates a query that will only match documents that contain the given value.
|
||||
* The default implementation returns a {@link TermQuery} over the value bytes
|
||||
* @throws IllegalArgumentException if {@code value} cannot be converted to the expected data type or if the field is not searchable
|
||||
|
|
|
@ -913,7 +913,7 @@ public class SearchModule {
|
|||
NamedRegistry<Highlighter> highlighters = new NamedRegistry<>("highlighter");
|
||||
highlighters.register("fvh", new FastVectorHighlighter(settings));
|
||||
highlighters.register("plain", new PlainHighlighter());
|
||||
highlighters.register("unified", new DefaultHighlighter());
|
||||
highlighters.register(DefaultHighlighter.NAME, new DefaultHighlighter());
|
||||
highlighters.extractAndRegister(plugins, SearchPlugin::getHighlighters);
|
||||
|
||||
return unmodifiableMap(highlighters.getRegistry());
|
||||
|
|
|
@ -50,6 +50,8 @@ import static org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighte
|
|||
|
||||
public class DefaultHighlighter implements Highlighter {
|
||||
|
||||
public static final String NAME = "unified";
|
||||
|
||||
@Override
|
||||
public boolean canHighlight(MappedFieldType fieldType) {
|
||||
return true;
|
||||
|
|
|
@ -66,7 +66,7 @@ public class HighlightPhase implements FetchSubPhase {
|
|||
Map<String, Function<HitContext, FieldHighlightContext>> contextBuilders = fieldContext.builders;
|
||||
for (String field : contextBuilders.keySet()) {
|
||||
FieldHighlightContext fieldContext = contextBuilders.get(field).apply(hitContext);
|
||||
Highlighter highlighter = getHighlighter(fieldContext.field);
|
||||
Highlighter highlighter = getHighlighter(fieldContext.field, fieldContext.fieldType);
|
||||
HighlightField highlightField = highlighter.highlight(fieldContext);
|
||||
if (highlightField != null) {
|
||||
// Note that we make sure to use the original field name in the response. This is because the
|
||||
|
@ -80,10 +80,10 @@ public class HighlightPhase implements FetchSubPhase {
|
|||
};
|
||||
}
|
||||
|
||||
private Highlighter getHighlighter(SearchHighlightContext.Field field) {
|
||||
private Highlighter getHighlighter(SearchHighlightContext.Field field, MappedFieldType fieldType) {
|
||||
String highlighterType = field.fieldOptions().highlighterType();
|
||||
if (highlighterType == null) {
|
||||
highlighterType = "unified";
|
||||
highlighterType = fieldType.getDefaultHighlighter();
|
||||
}
|
||||
Highlighter highlighter = highlighters.get(highlighterType);
|
||||
if (highlighter == null) {
|
||||
|
@ -103,8 +103,6 @@ public class HighlightPhase implements FetchSubPhase {
|
|||
Map<String, Function<HitContext, FieldHighlightContext>> builders = new LinkedHashMap<>();
|
||||
StoredFieldsSpec storedFieldsSpec = StoredFieldsSpec.NO_REQUIREMENTS;
|
||||
for (SearchHighlightContext.Field field : highlightContext.fields()) {
|
||||
Highlighter highlighter = getHighlighter(field);
|
||||
|
||||
Collection<String> fieldNamesToHighlight = context.getSearchExecutionContext().getMatchingFieldNames(field.field());
|
||||
|
||||
boolean fieldNameContainsWildcards = field.field().contains("*");
|
||||
|
@ -112,6 +110,7 @@ public class HighlightPhase implements FetchSubPhase {
|
|||
boolean sourceRequired = false;
|
||||
for (String fieldName : fieldNamesToHighlight) {
|
||||
MappedFieldType fieldType = context.getSearchExecutionContext().getFieldType(fieldName);
|
||||
Highlighter highlighter = getHighlighter(field, fieldType);
|
||||
|
||||
// We should prevent highlighting if a field is anything but a text, match_only_text,
|
||||
// or keyword field.
|
||||
|
|
|
@ -25,6 +25,7 @@ import static org.elasticsearch.xpack.inference.queries.SemanticSparseVectorQuer
|
|||
public class InferenceFeatures implements FeatureSpecification {
|
||||
|
||||
private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER = new NodeFeature("semantic_text.highlighter");
|
||||
private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT = new NodeFeature("semantic_text.highlighter.default");
|
||||
|
||||
@Override
|
||||
public Set<NodeFeature> getTestFeatures() {
|
||||
|
@ -40,7 +41,8 @@ public class InferenceFeatures implements FeatureSpecification {
|
|||
SemanticInferenceMetadataFieldsMapper.EXPLICIT_NULL_FIXES,
|
||||
SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED,
|
||||
TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_ALIAS_HANDLING_FIX,
|
||||
SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT
|
||||
SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT,
|
||||
SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -73,6 +73,7 @@ import org.elasticsearch.xcontent.XContentType;
|
|||
import org.elasticsearch.xpack.core.ml.inference.results.MlTextEmbeddingResults;
|
||||
import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults;
|
||||
import org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilder;
|
||||
import org.elasticsearch.xpack.inference.highlight.SemanticTextHighlighter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UncheckedIOException;
|
||||
|
@ -580,6 +581,11 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
|
|||
return TextFieldMapper.CONTENT_TYPE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDefaultHighlighter() {
|
||||
return SemanticTextHighlighter.NAME;
|
||||
}
|
||||
|
||||
public String getInferenceId() {
|
||||
return inferenceId;
|
||||
}
|
||||
|
|
|
@ -55,22 +55,32 @@ setup:
|
|||
index.mapping.semantic_text.use_legacy_format: false
|
||||
mappings:
|
||||
properties:
|
||||
title:
|
||||
type: text
|
||||
body:
|
||||
type: semantic_text
|
||||
inference_id: dense-inference-id
|
||||
|
||||
---
|
||||
"Highlighting using a sparse embedding model":
|
||||
- do:
|
||||
index:
|
||||
index: test-sparse-index
|
||||
id: doc_1
|
||||
body:
|
||||
title: "Elasticsearch"
|
||||
body: ["ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!"]
|
||||
refresh: true
|
||||
|
||||
- match: { result: created }
|
||||
- do:
|
||||
index:
|
||||
index: test-dense-index
|
||||
id: doc_1
|
||||
body:
|
||||
title: "Elasticsearch"
|
||||
body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
refresh: true
|
||||
|
||||
---
|
||||
"Highlighting using a sparse embedding model":
|
||||
- do:
|
||||
search:
|
||||
index: test-sparse-index
|
||||
|
@ -153,16 +163,6 @@ setup:
|
|||
|
||||
---
|
||||
"Highlighting using a dense embedding model":
|
||||
- do:
|
||||
index:
|
||||
index: test-dense-index
|
||||
id: doc_1
|
||||
body:
|
||||
body: ["ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!"]
|
||||
refresh: true
|
||||
|
||||
- match: { result: created }
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test-dense-index
|
||||
|
@ -243,4 +243,51 @@ setup:
|
|||
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
|
||||
- match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
|
||||
---
|
||||
"Default highlighter for fields":
|
||||
- requires:
|
||||
cluster_features: "semantic_text.highlighter.default"
|
||||
reason: semantic text field defaults to the semantic highlighter
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test-dense-index
|
||||
body:
|
||||
query:
|
||||
match:
|
||||
body: "What is Elasticsearch?"
|
||||
highlight:
|
||||
fields:
|
||||
body:
|
||||
order: "score"
|
||||
number_of_fragments: 2
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.body: 2 }
|
||||
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
|
||||
- match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
|
||||
---
|
||||
"semantic highlighter ignores non-inference fields":
|
||||
- requires:
|
||||
cluster_features: "semantic_text.highlighter.default"
|
||||
reason: semantic text field defaults to the semantic highlighter
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test-dense-index
|
||||
body:
|
||||
query:
|
||||
match:
|
||||
title: "Elasticsearch"
|
||||
highlight:
|
||||
fields:
|
||||
title:
|
||||
type: semantic
|
||||
number_of_fragments: 2
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- not_exists: hits.hits.0.highlight.title
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue