[Code] Escape highlight extracting regex and use term ES query for document search (#39473) (#39527)

This commit is contained in:
Mengwei Ding 2019-06-24 12:56:22 -07:00 committed by GitHub
parent 5b50782e10
commit 9d78bed4f9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 36 additions and 19 deletions

View file

@ -42,7 +42,7 @@ const mockSearchResults = [
},
highlight: {
content: [
'declare namespace Express {\n interface Request extends Flash {}\n}\n\ninterface Flash {\n flash(type: _@_string_@_',
'declare namespace Express {\n interface Request extends Flash {}\n}\n\ninterface Flash {\n flash(type: _@-string-@_',
],
},
},

View file

@ -4,6 +4,7 @@
* you may not use this file except in compliance with the Elastic License.
*/
import _ from 'lodash';
import { IRange } from 'monaco-editor';
import { LineMapper } from '../../common/line_mapper';
import {
@ -30,7 +31,8 @@ const HIT_MERGE_LINE_INTERVAL = 2; // Inclusive
const MAX_HIT_NUMBER = 5;
export class DocumentSearchClient extends AbstractSearchClient {
private HIGHLIGHT_TAG = '_@_';
private HIGHLIGHT_PRE_TAG = '_@-';
private HIGHLIGHT_POST_TAG = '-@_';
private LINE_SEPARATOR = '\n';
constructor(protected readonly client: EsClient, protected readonly log: Logger) {
@ -63,15 +65,21 @@ export class DocumentSearchClient extends AbstractSearchClient {
},
};
// The query to search content and path filter.
const contentAndPathQuery = {
simple_query_string: {
query: req.query,
fields: ['content^1.0', 'path^1.0'],
default_operator: 'or',
lenient: false,
analyze_wildcard: false,
boost: 1.0,
// The queries to search content and path filter.
const contentQuery = {
term: {
content: {
value: req.query,
boost: 1.0,
},
},
};
const pathQuery = {
term: {
path: {
value: req.query,
boost: 1.0,
},
},
};
@ -110,7 +118,7 @@ export class DocumentSearchClient extends AbstractSearchClient {
size,
query: {
bool: {
should: [qnameQuery, contentAndPathQuery],
should: [qnameQuery, contentQuery, pathQuery],
disable_coord: false,
adjust_pure_negative: true,
boost: 1.0,
@ -179,8 +187,8 @@ export class DocumentSearchClient extends AbstractSearchClient {
},
highlight: {
// TODO: we might need to improve the highlighting separator.
pre_tags: [this.HIGHLIGHT_TAG],
post_tags: [this.HIGHLIGHT_TAG],
pre_tags: [this.HIGHLIGHT_PRE_TAG],
post_tags: [this.HIGHLIGHT_POST_TAG],
fields: {
content: {},
path: {},
@ -344,13 +352,16 @@ export class DocumentSearchClient extends AbstractSearchClient {
}
private termsToHits(source: string, terms: string[]): SourceHit[] {
const filteredTerms = terms.filter(t => t.trim().length > 0);
if (filteredTerms.length === 0) {
// Dedup search terms by using Set.
const filteredTerms = new Set(
terms.filter(t => t.trim().length > 0).map(t => _.escapeRegExp(t))
);
if (filteredTerms.size === 0) {
return [];
}
const lineMapper = new LineMapper(source);
const regex = new RegExp(`(${filteredTerms.join('|')})`, 'g');
const regex = new RegExp(`(${Array.from(filteredTerms.values()).join('|')})`, 'g');
let match;
const hits: SourceHit[] = [];
do {
@ -379,11 +390,17 @@ export class DocumentSearchClient extends AbstractSearchClient {
if (!text) {
return [];
} else {
const keywordRegex = new RegExp(`${this.HIGHLIGHT_TAG}(\\w*)${this.HIGHLIGHT_TAG}`, 'g');
// console.log(text);
const keywordRegex = new RegExp(
`${this.HIGHLIGHT_PRE_TAG}(\\w*)${this.HIGHLIGHT_POST_TAG}`,
'g'
);
const keywords = text.match(keywordRegex);
if (keywords) {
return keywords.map((k: string) => {
return k.replace(new RegExp(this.HIGHLIGHT_TAG, 'g'), '');
return k
.replace(new RegExp(this.HIGHLIGHT_PRE_TAG, 'g'), '')
.replace(new RegExp(this.HIGHLIGHT_POST_TAG, 'g'), '');
});
} else {
return [];