elasticsearch/docs/reference/tab-widgets/highlighting-multi-fields.asciidoc
Mayya Sharipova 2337eb05a0
Unified Highlighter to support matched_fields (#107640)
Add support to the Unified highlighter to combine matches on multiple fields
to highlight a single field: "matched_fields".

Based on Lucene PR: https://github.com/apache/lucene/pull/13268

Lucene PR is based on the concept of masked fields where masked fields
are different from the original highlighted field. This PR in
Elasticsearch uses the already existing highlighter parameter
"matched_fields".
2024-05-09 10:35:29 -04:00

465 lines
10 KiB
Text

// tag::unified[]
In the following examples, `comment` is analyzed by the `standard`
analyzer and `comment.english` is analyzed by the `english` analyzer.
[source,console]
--------------------------------------------------
PUT index1
{
"mappings": {
"properties": {
"comment": {
"type": "text",
"analyzer": "standard",
"fields": {
"english": {
"type": "text",
"analyzer": "english"
}
}
}
}
}
}
--------------------------------------------------
[source,console]
--------------------------------------------------
PUT index1/_bulk?refresh=true
{"index": {"_id": "doc1" }}
{"comment": "run with scissors"}
{ "index" : {"_id": "doc2"} }
{"comment": "running with scissors"}
--------------------------------------------------
// TEST[continued]
[source,console]
--------------------------------------------------
GET index1/_search
{
"query": {
"query_string": {
"query": "running with scissors",
"fields": ["comment", "comment.english"]
}
},
"highlight": {
"order": "score",
"fields": {
"comment": {}
}
}
}
--------------------------------------------------
// TEST[continued]
The above request matches both "run with scissors" and "running with scissors"
and would highlight "running" and "scissors" but not "run". If both
phrases appear in a large document then "running with scissors" is
sorted above "run with scissors" in the fragments list because there
are more matches in that fragment.
[source,console-result]
----
{
...
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score": 1.0577903,
"hits" : [
{
"_index" : "index1",
"_id" : "doc2",
"_score" : 1.0577903,
"_source" : {
"comment" : "running with scissors"
},
"highlight" : {
"comment" : [
"<em>running</em> <em>with</em> <em>scissors</em>"
]
}
},
{
"_index" : "index1",
"_id" : "doc1",
"_score" : 0.36464313,
"_source" : {
"comment" : "run with scissors"
},
"highlight" : {
"comment" : [
"run <em>with</em> <em>scissors</em>"
]
}
}
]
}
}
----
// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
The below request highlights "run" as well as "running" and "scissors",
because the `matched_fields` parameter instructs that for highlighting
we need to combine matches from the `comment.english` field with
the matches from the original `comment` field.
[source,console]
--------------------------------------------------
GET index1/_search
{
"query": {
"query_string": {
"query": "running with scissors",
"fields": ["comment", "comment.english"]
}
},
"highlight": {
"order": "score",
"fields": {
"comment": {
"matched_fields": ["comment.english"]
}
}
}
}
--------------------------------------------------
// TEST[continued]
[source,console-result]
----
{
...
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score": 1.0577903,
"hits" : [
{
"_index" : "index1",
"_id" : "doc2",
"_score" : 1.0577903,
"_source" : {
"comment" : "running with scissors"
},
"highlight" : {
"comment" : [
"<em>running</em> <em>with</em> <em>scissors</em>"
]
}
},
{
"_index" : "index1",
"_id" : "doc1",
"_score" : 0.36464313,
"_source" : {
"comment" : "run with scissors"
},
"highlight" : {
"comment" : [
"<em>run</em> <em>with</em> <em>scissors</em>"
]
}
}
]
}
}
----
// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
// end::unified[]
// tag::fvh[]
In the following examples, `comment` is analyzed by the `standard`
analyzer and `comment.english` is analyzed by the `english` analyzer.
[source,console]
--------------------------------------------------
PUT index2
{
"mappings": {
"properties": {
"comment": {
"type": "text",
"analyzer": "standard",
"term_vector": "with_positions_offsets",
"fields": {
"english": {
"type": "text",
"analyzer": "english",
"term_vector": "with_positions_offsets"
}
}
}
}
}
}
--------------------------------------------------
[source,console]
--------------------------------------------------
PUT index2/_bulk?refresh=true
{"index": {"_id": "doc1" }}
{"comment": "run with scissors"}
{ "index" : {"_id": "doc2"} }
{"comment": "running with scissors"}
--------------------------------------------------
// TEST[continued]
[source,console]
--------------------------------------------------
GET index2/_search
{
"query": {
"query_string": {
"query": "running with scissors",
"fields": ["comment", "comment.english"]
}
},
"highlight": {
"order": "score",
"fields": {
"comment": {
"type" : "fvh"
}
}
}
}
--------------------------------------------------
// TEST[continued]
The above request matches both "run with scissors" and "running with scissors"
and would highlight "running" and "scissors" but not "run". If both
phrases appear in a large document then "running with scissors" is
sorted above "run with scissors" in the fragments list because there
are more matches in that fragment.
[source,console-result]
----
{
...
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score": 1.0577903,
"hits" : [
{
"_index" : "index2",
"_id" : "doc2",
"_score" : 1.0577903,
"_source" : {
"comment" : "running with scissors"
},
"highlight" : {
"comment" : [
"<em>running</em> <em>with</em> <em>scissors</em>"
]
}
},
{
"_index" : "index2",
"_id" : "doc1",
"_score" : 0.36464313,
"_source" : {
"comment" : "run with scissors"
},
"highlight" : {
"comment" : [
"run <em>with</em> <em>scissors</em>"
]
}
}
]
}
}
----
// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
The below request highlights "run" as well as "running" and "scissors",
because the `matched_fields` parameter instructs that for highlighting
we need to combine matches from the `comment` and `comment.english` fields.
[source,console]
--------------------------------------------------
GET index2/_search
{
"query": {
"query_string": {
"query": "running with scissors",
"fields": ["comment", "comment.english"]
}
},
"highlight": {
"order": "score",
"fields": {
"comment": {
"type" : "fvh",
"matched_fields": ["comment", "comment.english"]
}
}
}
}
--------------------------------------------------
// TEST[continued]
[source,console-result]
----
{
...
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score": 1.0577903,
"hits" : [
{
"_index" : "index2",
"_id" : "doc2",
"_score" : 1.0577903,
"_source" : {
"comment" : "running with scissors"
},
"highlight" : {
"comment" : [
"<em>running</em> <em>with</em> <em>scissors</em>"
]
}
},
{
"_index" : "index2",
"_id" : "doc1",
"_score" : 0.36464313,
"_source" : {
"comment" : "run with scissors"
},
"highlight" : {
"comment" : [
"<em>run</em> <em>with</em> <em>scissors</em>"
]
}
}
]
}
}
----
// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
The below request wouldn't highlight "run" or "scissor" but shows that
it is just fine not to list the field to which the matches are combined
(`comment.english`) in the matched fields.
[source,console]
--------------------------------------------------
GET index2/_search
{
"query": {
"query_string": {
"query": "running with scissors",
"fields": ["comment", "comment.english"]
}
},
"highlight": {
"order": "score",
"fields": {
"comment.english": {
"type" : "fvh",
"matched_fields": ["comment"]
}
}
}
}
--------------------------------------------------
// TEST[continued]
[source,console-result]
----
{
...
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score": 1.0577903,
"hits" : [
{
"_index" : "index2",
"_id" : "doc2",
"_score" : 1.0577903,
"_source" : {
"comment" : "running with scissors"
},
"highlight" : {
"comment.english" : [
"<em>running</em> <em>with</em> <em>scissors</em>"
]
}
},
{
"_index" : "index2",
"_id" : "doc1",
"_score" : 0.36464313,
"_source" : {
"comment" : "run with scissors"
},
"highlight" : {
"comment.english" : [
"run <em>with</em> <em>scissors</em>"
]
}
}
]
}
}
----
// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
[NOTE]
===================================================================
There is a small amount of overhead involved with setting
`matched_fields` to a non-empty array so always prefer
[source,js]
--------------------------------------------------
"highlight": {
"fields": {
"comment": {}
}
}
--------------------------------------------------
// NOTCONSOLE
to
[source,js]
--------------------------------------------------
"highlight": {
"fields": {
"comment": {
"matched_fields": ["comment"],
"type" : "fvh"
}
}
}
--------------------------------------------------
// NOTCONSOLE
// end::fvh[]