Add ability to set "max_analyzed_offset" implicitly to "index.highlight (#118895)

Add ability to set "max_analyzed_offet" implicitly to "index.highlight
.max_analyzed_offset", by setting it excplicitly to "-1".

Closes #112822
This commit is contained in:
Svilen Mihaylov 2025-01-07 11:19:07 -05:00 committed by GitHub
parent edfe2c5c6d
commit 93c349cc76
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 182 additions and 29 deletions

View file

@ -17,6 +17,7 @@ import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.Ann
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
@ -52,7 +53,7 @@ public class AnnotatedTextHighlighter extends DefaultHighlighter {
}
@Override
protected Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
protected Analyzer wrapAnalyzer(Analyzer analyzer, QueryMaxAnalyzedOffset maxAnalyzedOffset) {
return new AnnotatedHighlighterAnalyzer(super.wrapAnalyzer(analyzer, maxAnalyzedOffset));
}

View file

@ -39,6 +39,7 @@ import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.Ann
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper;
import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
import org.elasticsearch.lucene.search.uhighlight.Snippet;
import org.elasticsearch.search.fetch.subphase.highlight.LimitTokenOffsetAnalyzer;
import org.elasticsearch.test.ESTestCase;
@ -85,7 +86,7 @@ public class AnnotatedTextHighlighterTests extends ESTestCase {
int noMatchSize,
String[] expectedPassages,
int maxAnalyzedOffset,
Integer queryMaxAnalyzedOffset
Integer queryMaxAnalyzedOffsetIn
) throws Exception {
try (Directory dir = newDirectory()) {
@ -116,8 +117,9 @@ public class AnnotatedTextHighlighterTests extends ESTestCase {
for (int i = 0; i < markedUpInputs.length; i++) {
annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
}
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset = QueryMaxAnalyzedOffset.create(queryMaxAnalyzedOffsetIn, maxAnalyzedOffset);
if (queryMaxAnalyzedOffset != null) {
wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset);
wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset.getNotNull());
}
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
hiliteAnalyzer.setAnnotations(annotations);
@ -311,6 +313,19 @@ public class AnnotatedTextHighlighterTests extends ESTestCase {
e.getMessage()
);
// Same as before, but force using index maxOffset (20) as queryMaxOffset by passing -1.
assertHighlightOneDoc(
"text",
new String[] { "[Long Text exceeds](Long+Text+exceeds) MAX analyzed offset)" },
query,
Locale.ROOT,
breakIterator,
0,
new String[] { "Long Text [exceeds](_hit_term=exceeds) MAX analyzed offset)" },
20,
-1
);
assertHighlightOneDoc(
"text",
new String[] { "[Long Text Exceeds](Long+Text+Exceeds) MAX analyzed offset [Long Text Exceeds](Long+Text+Exceeds)" },