mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-04-25 07:37:19 -04:00
Add support to the Unified highlighter to combine matches on multiple fields to highlight a single field: "matched_fields". Based on Lucene PR: https://github.com/apache/lucene/pull/13268 Lucene PR is based on the concept of masked fields where masked fields are different from the original highlighted field. This PR in Elasticsearch uses the already existing highlighter parameter "matched_fields".
465 lines
10 KiB
Text
465 lines
10 KiB
Text
// tag::unified[]
|
|
|
|
In the following examples, `comment` is analyzed by the `standard`
|
|
analyzer and `comment.english` is analyzed by the `english` analyzer.
|
|
|
|
[source,console]
|
|
--------------------------------------------------
|
|
PUT index1
|
|
{
|
|
"mappings": {
|
|
"properties": {
|
|
"comment": {
|
|
"type": "text",
|
|
"analyzer": "standard",
|
|
"fields": {
|
|
"english": {
|
|
"type": "text",
|
|
"analyzer": "english"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
--------------------------------------------------
|
|
|
|
|
|
[source,console]
|
|
--------------------------------------------------
|
|
PUT index1/_bulk?refresh=true
|
|
{"index": {"_id": "doc1" }}
|
|
{"comment": "run with scissors"}
|
|
{ "index" : {"_id": "doc2"} }
|
|
{"comment": "running with scissors"}
|
|
|
|
--------------------------------------------------
|
|
// TEST[continued]
|
|
|
|
|
|
[source,console]
|
|
--------------------------------------------------
|
|
GET index1/_search
|
|
{
|
|
"query": {
|
|
"query_string": {
|
|
"query": "running with scissors",
|
|
"fields": ["comment", "comment.english"]
|
|
}
|
|
},
|
|
"highlight": {
|
|
"order": "score",
|
|
"fields": {
|
|
"comment": {}
|
|
}
|
|
}
|
|
}
|
|
--------------------------------------------------
|
|
// TEST[continued]
|
|
|
|
The above request matches both "run with scissors" and "running with scissors"
|
|
and would highlight "running" and "scissors" but not "run". If both
|
|
phrases appear in a large document then "running with scissors" is
|
|
sorted above "run with scissors" in the fragments list because there
|
|
are more matches in that fragment.
|
|
|
|
[source,console-result]
|
|
----
|
|
{
|
|
...
|
|
"hits" : {
|
|
"total" : {
|
|
"value" : 2,
|
|
"relation" : "eq"
|
|
},
|
|
"max_score": 1.0577903,
|
|
"hits" : [
|
|
{
|
|
"_index" : "index1",
|
|
"_id" : "doc2",
|
|
"_score" : 1.0577903,
|
|
"_source" : {
|
|
"comment" : "running with scissors"
|
|
},
|
|
"highlight" : {
|
|
"comment" : [
|
|
"<em>running</em> <em>with</em> <em>scissors</em>"
|
|
]
|
|
}
|
|
},
|
|
{
|
|
"_index" : "index1",
|
|
"_id" : "doc1",
|
|
"_score" : 0.36464313,
|
|
"_source" : {
|
|
"comment" : "run with scissors"
|
|
},
|
|
"highlight" : {
|
|
"comment" : [
|
|
"run <em>with</em> <em>scissors</em>"
|
|
]
|
|
}
|
|
}
|
|
]
|
|
}
|
|
}
|
|
----
|
|
// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
|
|
|
|
The below request highlights "run" as well as "running" and "scissors",
|
|
because the `matched_fields` parameter instructs that for highlighting
|
|
we need to combine matches from the `comment.english` field with
|
|
the matches from the original `comment` field.
|
|
|
|
[source,console]
|
|
--------------------------------------------------
|
|
GET index1/_search
|
|
{
|
|
"query": {
|
|
"query_string": {
|
|
"query": "running with scissors",
|
|
"fields": ["comment", "comment.english"]
|
|
}
|
|
},
|
|
"highlight": {
|
|
"order": "score",
|
|
"fields": {
|
|
"comment": {
|
|
"matched_fields": ["comment.english"]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
--------------------------------------------------
|
|
// TEST[continued]
|
|
|
|
[source,console-result]
|
|
----
|
|
{
|
|
...
|
|
"hits" : {
|
|
"total" : {
|
|
"value" : 2,
|
|
"relation" : "eq"
|
|
},
|
|
"max_score": 1.0577903,
|
|
"hits" : [
|
|
{
|
|
"_index" : "index1",
|
|
"_id" : "doc2",
|
|
"_score" : 1.0577903,
|
|
"_source" : {
|
|
"comment" : "running with scissors"
|
|
},
|
|
"highlight" : {
|
|
"comment" : [
|
|
"<em>running</em> <em>with</em> <em>scissors</em>"
|
|
]
|
|
}
|
|
},
|
|
{
|
|
"_index" : "index1",
|
|
"_id" : "doc1",
|
|
"_score" : 0.36464313,
|
|
"_source" : {
|
|
"comment" : "run with scissors"
|
|
},
|
|
"highlight" : {
|
|
"comment" : [
|
|
"<em>run</em> <em>with</em> <em>scissors</em>"
|
|
]
|
|
}
|
|
}
|
|
]
|
|
}
|
|
}
|
|
----
|
|
// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
|
|
|
|
// end::unified[]
|
|
|
|
|
|
|
|
|
|
|
|
// tag::fvh[]
|
|
|
|
In the following examples, `comment` is analyzed by the `standard`
|
|
analyzer and `comment.english` is analyzed by the `english` analyzer.
|
|
|
|
[source,console]
|
|
--------------------------------------------------
|
|
PUT index2
|
|
{
|
|
"mappings": {
|
|
"properties": {
|
|
"comment": {
|
|
"type": "text",
|
|
"analyzer": "standard",
|
|
"term_vector": "with_positions_offsets",
|
|
"fields": {
|
|
"english": {
|
|
"type": "text",
|
|
"analyzer": "english",
|
|
"term_vector": "with_positions_offsets"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
--------------------------------------------------
|
|
|
|
|
|
[source,console]
|
|
--------------------------------------------------
|
|
PUT index2/_bulk?refresh=true
|
|
{"index": {"_id": "doc1" }}
|
|
{"comment": "run with scissors"}
|
|
{ "index" : {"_id": "doc2"} }
|
|
{"comment": "running with scissors"}
|
|
|
|
--------------------------------------------------
|
|
// TEST[continued]
|
|
|
|
|
|
[source,console]
|
|
--------------------------------------------------
|
|
GET index2/_search
|
|
{
|
|
"query": {
|
|
"query_string": {
|
|
"query": "running with scissors",
|
|
"fields": ["comment", "comment.english"]
|
|
}
|
|
},
|
|
"highlight": {
|
|
"order": "score",
|
|
"fields": {
|
|
"comment": {
|
|
"type" : "fvh"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
--------------------------------------------------
|
|
// TEST[continued]
|
|
|
|
The above request matches both "run with scissors" and "running with scissors"
|
|
and would highlight "running" and "scissors" but not "run". If both
|
|
phrases appear in a large document then "running with scissors" is
|
|
sorted above "run with scissors" in the fragments list because there
|
|
are more matches in that fragment.
|
|
|
|
[source,console-result]
|
|
----
|
|
{
|
|
...
|
|
"hits" : {
|
|
"total" : {
|
|
"value" : 2,
|
|
"relation" : "eq"
|
|
},
|
|
"max_score": 1.0577903,
|
|
"hits" : [
|
|
{
|
|
"_index" : "index2",
|
|
"_id" : "doc2",
|
|
"_score" : 1.0577903,
|
|
"_source" : {
|
|
"comment" : "running with scissors"
|
|
},
|
|
"highlight" : {
|
|
"comment" : [
|
|
"<em>running</em> <em>with</em> <em>scissors</em>"
|
|
]
|
|
}
|
|
},
|
|
{
|
|
"_index" : "index2",
|
|
"_id" : "doc1",
|
|
"_score" : 0.36464313,
|
|
"_source" : {
|
|
"comment" : "run with scissors"
|
|
},
|
|
"highlight" : {
|
|
"comment" : [
|
|
"run <em>with</em> <em>scissors</em>"
|
|
]
|
|
}
|
|
}
|
|
]
|
|
}
|
|
}
|
|
----
|
|
// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
|
|
|
|
The below request highlights "run" as well as "running" and "scissors",
|
|
because the `matched_fields` parameter instructs that for highlighting
|
|
we need to combine matches from the `comment` and `comment.english` fields.
|
|
|
|
[source,console]
|
|
--------------------------------------------------
|
|
GET index2/_search
|
|
{
|
|
"query": {
|
|
"query_string": {
|
|
"query": "running with scissors",
|
|
"fields": ["comment", "comment.english"]
|
|
}
|
|
},
|
|
"highlight": {
|
|
"order": "score",
|
|
"fields": {
|
|
"comment": {
|
|
"type" : "fvh",
|
|
"matched_fields": ["comment", "comment.english"]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
--------------------------------------------------
|
|
// TEST[continued]
|
|
|
|
[source,console-result]
|
|
----
|
|
{
|
|
...
|
|
"hits" : {
|
|
"total" : {
|
|
"value" : 2,
|
|
"relation" : "eq"
|
|
},
|
|
"max_score": 1.0577903,
|
|
"hits" : [
|
|
{
|
|
"_index" : "index2",
|
|
"_id" : "doc2",
|
|
"_score" : 1.0577903,
|
|
"_source" : {
|
|
"comment" : "running with scissors"
|
|
},
|
|
"highlight" : {
|
|
"comment" : [
|
|
"<em>running</em> <em>with</em> <em>scissors</em>"
|
|
]
|
|
}
|
|
},
|
|
{
|
|
"_index" : "index2",
|
|
"_id" : "doc1",
|
|
"_score" : 0.36464313,
|
|
"_source" : {
|
|
"comment" : "run with scissors"
|
|
},
|
|
"highlight" : {
|
|
"comment" : [
|
|
"<em>run</em> <em>with</em> <em>scissors</em>"
|
|
]
|
|
}
|
|
}
|
|
]
|
|
}
|
|
}
|
|
----
|
|
// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
|
|
|
|
The below request wouldn't highlight "run" or "scissor" but shows that
|
|
it is just fine not to list the field to which the matches are combined
|
|
(`comment.english`) in the matched fields.
|
|
|
|
[source,console]
|
|
--------------------------------------------------
|
|
GET index2/_search
|
|
{
|
|
"query": {
|
|
"query_string": {
|
|
"query": "running with scissors",
|
|
"fields": ["comment", "comment.english"]
|
|
}
|
|
},
|
|
"highlight": {
|
|
"order": "score",
|
|
"fields": {
|
|
"comment.english": {
|
|
"type" : "fvh",
|
|
"matched_fields": ["comment"]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
--------------------------------------------------
|
|
// TEST[continued]
|
|
|
|
|
|
[source,console-result]
|
|
----
|
|
{
|
|
...
|
|
"hits" : {
|
|
"total" : {
|
|
"value" : 2,
|
|
"relation" : "eq"
|
|
},
|
|
"max_score": 1.0577903,
|
|
"hits" : [
|
|
{
|
|
"_index" : "index2",
|
|
"_id" : "doc2",
|
|
"_score" : 1.0577903,
|
|
"_source" : {
|
|
"comment" : "running with scissors"
|
|
},
|
|
"highlight" : {
|
|
"comment.english" : [
|
|
"<em>running</em> <em>with</em> <em>scissors</em>"
|
|
]
|
|
}
|
|
},
|
|
{
|
|
"_index" : "index2",
|
|
"_id" : "doc1",
|
|
"_score" : 0.36464313,
|
|
"_source" : {
|
|
"comment" : "run with scissors"
|
|
},
|
|
"highlight" : {
|
|
"comment.english" : [
|
|
"run <em>with</em> <em>scissors</em>"
|
|
]
|
|
}
|
|
}
|
|
]
|
|
}
|
|
}
|
|
----
|
|
// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
|
|
|
|
[NOTE]
|
|
===================================================================
|
|
There is a small amount of overhead involved with setting
|
|
`matched_fields` to a non-empty array so always prefer
|
|
[source,js]
|
|
--------------------------------------------------
|
|
"highlight": {
|
|
"fields": {
|
|
"comment": {}
|
|
}
|
|
}
|
|
--------------------------------------------------
|
|
// NOTCONSOLE
|
|
to
|
|
[source,js]
|
|
--------------------------------------------------
|
|
"highlight": {
|
|
"fields": {
|
|
"comment": {
|
|
"matched_fields": ["comment"],
|
|
"type" : "fvh"
|
|
}
|
|
}
|
|
}
|
|
--------------------------------------------------
|
|
// NOTCONSOLE
|
|
|
|
// end::fvh[]
|