[8.14] [Search] [Playground] [Bug Fix] Model id detection fix when using search based dynamic template (#186665) (#186852)

# Backport

This will backport the following commits from `main` to `8.14`:
- [[Search] [Playground] [Bug Fix] Model id detection fix when using
search based dynamic template
(#186665)](https://github.com/elastic/kibana/pull/186665)

<!--- Backport version: 8.9.8 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Joe
McElroy","email":"joseph.mcelroy@elastic.co"},"sourceCommit":{"committedDate":"2024-06-24T15:17:09Z","message":"[Search]
[Playground] [Bug Fix] Model id detection fix when using search based
dynamic template (#186665)\n\n## Summary\r\n\r\nmodel_id detection
relies on creating an aggregation for the model_id\r\nfield which
requires a keyword field. This is usually added to the\r\n`.keyword`
multi-field. The issue is when using the `search` dynamic\r\ntemplate,
all text based fields create a keyword field with the
`enum`\r\nkey.\r\n\r\nThis fix iterates through all the sub fields of
the model_id field to\r\nfind a keyword based one, rather than relying
on the `.keyword`\r\nsubfield.\r\n\r\n### Checklist\r\n\r\nDelete any
items that are not applicable to this PR.\r\n\r\n- [ ] Any text added
follows [EUI's
writing\r\nguidelines](https://elastic.github.io/eui/#/guidelines/writing),
uses\r\nsentence case text and includes
[i18n\r\nsupport](https://github.com/elastic/kibana/blob/main/packages/kbn-i18n/README.md)\r\n-
[
]\r\n[Documentation](https://www.elastic.co/guide/en/kibana/master/development-documentation.html)\r\nwas
added for features that require explanation or tutorials\r\n- [x] [Unit
or
functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere
updated or added to match the most common scenarios\r\n- [ ] [Flaky
Test\r\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1)
was\r\nused on any tests changed\r\n- [ ] Any UI touched in this PR is
usable by keyboard only (learn more\r\nabout [keyboard
accessibility](https://webaim.org/techniques/keyboard/))\r\n- [ ] Any UI
touched in this PR does not create any new axe failures\r\n(run axe in
browser:\r\n[FF](https://addons.mozilla.org/en-US/firefox/addon/axe-devtools/),\r\n[Chrome](https://chrome.google.com/webstore/detail/axe-web-accessibility-tes/lhdoppojpmngadmnindnejefpokejbdd?hl=en-US))\r\n-
[ ] If a plugin configuration key changed, check if it needs to
be\r\nallowlisted in the cloud and added to the
[docker\r\nlist](https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/resources/base/bin/kibana-docker)\r\n-
[ ] This renders correctly on smaller devices using a
responsive\r\nlayout. (You can test this [in
your\r\nbrowser](https://www.browserstack.com/guide/responsive-testing-on-local-server))\r\n-
[ ] This was checked for
[cross-browser\r\ncompatibility](https://www.elastic.co/support/matrix#matrix_browsers)","sha":"175b41a6fa341c4f19c7bdeb1931f27baa344bf7","branchLabelMapping":{"^v8.15.0$":"main","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["backport","release_note:skip","v8.15.0","v8.14.2"],"number":186665,"url":"https://github.com/elastic/kibana/pull/186665","mergeCommit":{"message":"[Search]
[Playground] [Bug Fix] Model id detection fix when using search based
dynamic template (#186665)\n\n## Summary\r\n\r\nmodel_id detection
relies on creating an aggregation for the model_id\r\nfield which
requires a keyword field. This is usually added to the\r\n`.keyword`
multi-field. The issue is when using the `search` dynamic\r\ntemplate,
all text based fields create a keyword field with the
`enum`\r\nkey.\r\n\r\nThis fix iterates through all the sub fields of
the model_id field to\r\nfind a keyword based one, rather than relying
on the `.keyword`\r\nsubfield.\r\n\r\n### Checklist\r\n\r\nDelete any
items that are not applicable to this PR.\r\n\r\n- [ ] Any text added
follows [EUI's
writing\r\nguidelines](https://elastic.github.io/eui/#/guidelines/writing),
uses\r\nsentence case text and includes
[i18n\r\nsupport](https://github.com/elastic/kibana/blob/main/packages/kbn-i18n/README.md)\r\n-
[
]\r\n[Documentation](https://www.elastic.co/guide/en/kibana/master/development-documentation.html)\r\nwas
added for features that require explanation or tutorials\r\n- [x] [Unit
or
functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere
updated or added to match the most common scenarios\r\n- [ ] [Flaky
Test\r\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1)
was\r\nused on any tests changed\r\n- [ ] Any UI touched in this PR is
usable by keyboard only (learn more\r\nabout [keyboard
accessibility](https://webaim.org/techniques/keyboard/))\r\n- [ ] Any UI
touched in this PR does not create any new axe failures\r\n(run axe in
browser:\r\n[FF](https://addons.mozilla.org/en-US/firefox/addon/axe-devtools/),\r\n[Chrome](https://chrome.google.com/webstore/detail/axe-web-accessibility-tes/lhdoppojpmngadmnindnejefpokejbdd?hl=en-US))\r\n-
[ ] If a plugin configuration key changed, check if it needs to
be\r\nallowlisted in the cloud and added to the
[docker\r\nlist](https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/resources/base/bin/kibana-docker)\r\n-
[ ] This renders correctly on smaller devices using a
responsive\r\nlayout. (You can test this [in
your\r\nbrowser](https://www.browserstack.com/guide/responsive-testing-on-local-server))\r\n-
[ ] This was checked for
[cross-browser\r\ncompatibility](https://www.elastic.co/support/matrix#matrix_browsers)","sha":"175b41a6fa341c4f19c7bdeb1931f27baa344bf7"}},"sourceBranch":"main","suggestedTargetBranches":["8.14"],"targetPullRequestStates":[{"branch":"main","label":"v8.15.0","labelRegex":"^v8.15.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/186665","number":186665,"mergeCommit":{"message":"[Search]
[Playground] [Bug Fix] Model id detection fix when using search based
dynamic template (#186665)\n\n## Summary\r\n\r\nmodel_id detection
relies on creating an aggregation for the model_id\r\nfield which
requires a keyword field. This is usually added to the\r\n`.keyword`
multi-field. The issue is when using the `search` dynamic\r\ntemplate,
all text based fields create a keyword field with the
`enum`\r\nkey.\r\n\r\nThis fix iterates through all the sub fields of
the model_id field to\r\nfind a keyword based one, rather than relying
on the `.keyword`\r\nsubfield.\r\n\r\n### Checklist\r\n\r\nDelete any
items that are not applicable to this PR.\r\n\r\n- [ ] Any text added
follows [EUI's
writing\r\nguidelines](https://elastic.github.io/eui/#/guidelines/writing),
uses\r\nsentence case text and includes
[i18n\r\nsupport](https://github.com/elastic/kibana/blob/main/packages/kbn-i18n/README.md)\r\n-
[
]\r\n[Documentation](https://www.elastic.co/guide/en/kibana/master/development-documentation.html)\r\nwas
added for features that require explanation or tutorials\r\n- [x] [Unit
or
functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere
updated or added to match the most common scenarios\r\n- [ ] [Flaky
Test\r\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1)
was\r\nused on any tests changed\r\n- [ ] Any UI touched in this PR is
usable by keyboard only (learn more\r\nabout [keyboard
accessibility](https://webaim.org/techniques/keyboard/))\r\n- [ ] Any UI
touched in this PR does not create any new axe failures\r\n(run axe in
browser:\r\n[FF](https://addons.mozilla.org/en-US/firefox/addon/axe-devtools/),\r\n[Chrome](https://chrome.google.com/webstore/detail/axe-web-accessibility-tes/lhdoppojpmngadmnindnejefpokejbdd?hl=en-US))\r\n-
[ ] If a plugin configuration key changed, check if it needs to
be\r\nallowlisted in the cloud and added to the
[docker\r\nlist](https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/resources/base/bin/kibana-docker)\r\n-
[ ] This renders correctly on smaller devices using a
responsive\r\nlayout. (You can test this [in
your\r\nbrowser](https://www.browserstack.com/guide/responsive-testing-on-local-server))\r\n-
[ ] This was checked for
[cross-browser\r\ncompatibility](https://www.elastic.co/support/matrix#matrix_browsers)","sha":"175b41a6fa341c4f19c7bdeb1931f27baa344bf7"}},{"branch":"8.14","label":"v8.14.2","labelRegex":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"}]}]
BACKPORT-->
This commit is contained in:
Joe McElroy 2024-06-26 13:30:50 +01:00 committed by GitHub
parent e8004f843b
commit 918446f93a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 642 additions and 15 deletions

View file

@ -1220,3 +1220,590 @@ export const DENSE_PASSAGE_FIRST_SINGLE_INDEX_DOC = {
},
},
} as SearchResponse<any>;
export const DENSE_OLD_PIPELINE_DOCS = [
{
took: 1,
timed_out: false,
_shards: { total: 1, successful: 1, skipped: 0, failed: 0 },
hits: { total: { value: 1, relation: 'eq' }, max_score: null, hits: [] },
aggregations: {
'ml.inference.body_content.model_id': {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [
{
key: '.multilingual-e5-small_linux-x86_64',
doc_count: 1,
},
],
},
},
} as SearchResponse<any>,
];
export const DENSE_PIPELINE_FIELD_CAPS = {
indices: ['search-test-e5'],
fields: {
additional_urls: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'title.stem': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'_ingest.processors.pipeline.stem': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'headings.delimiter': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'ml.inference.body_content.model_id.prefix': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'headings.enum': {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors': {
object: {
type: 'object',
metadata_field: false,
searchable: false,
aggregatable: false,
},
},
'_ingest.processors.types.joined': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'body_content.enum': {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
links: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
id: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'ml.inference.body_content.model_id.joined': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
ml: {
object: {
type: 'object',
metadata_field: false,
searchable: false,
aggregatable: false,
},
},
'ml.inference.body_content.model_id': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'ml.inference': {
object: {
type: 'object',
metadata_field: false,
searchable: false,
aggregatable: false,
},
},
body_content: {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'_ingest.processors.pipeline.joined': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
domains: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors.model_version.enum': {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'body_content.joined': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
url_scheme: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
meta_description: {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'ml.inference.body_content': {
object: {
type: 'object',
metadata_field: false,
searchable: false,
aggregatable: false,
},
},
headings: {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'_ingest.processors.types.enum': {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
last_crawled_at: {
date: {
type: 'date',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors.model_version.joined': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'title.delimiter': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'headings.prefix': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'title.enum': {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors.pipeline.enum': {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors.pipeline.prefix': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'meta_description.prefix': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'_ingest.processors.types.delimiter': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'title.joined': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'body_content.stem': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'_ingest.processors.types.stem': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'ml.inference.body_content.model_id.enum': {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
title: {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
meta_keywords: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors.processed_timestamp': {
date: {
type: 'date',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'ml.inference.body_content.model_id.delimiter': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'meta_description.enum': {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'meta_description.delimiter': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'title.prefix': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'_ingest.processors.pipeline': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
_ingest: {
object: {
type: 'object',
metadata_field: false,
searchable: false,
aggregatable: false,
},
},
'ml.inference.body_content.is_truncated': {
boolean: {
type: 'boolean',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors.model_version.prefix': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'_ingest.processors.model_version.delimiter': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
url_host: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
url_path: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors.model_version': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
url_path_dir3: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors.pipeline.delimiter': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'headings.joined': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'_ingest.processors.types': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'meta_description.joined': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'ml.inference.body_content.predicted_value': {
dense_vector: {
type: 'dense_vector',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
url: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'meta_description.stem': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'ml.inference.body_content.model_id.stem': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
url_port: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'body_content.delimiter': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'_ingest.processors.model_version.stem': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
url_path_dir2: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
url_path_dir1: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors.types.prefix': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'headings.stem': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'body_content.prefix': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
},
};

View file

@ -19,6 +19,8 @@ import {
SPARSE_INPUT_OUTPUT_ONE_INDEX,
SPARSE_INPUT_OUTPUT_ONE_INDEX_FIELD_CAPS,
SPARSE_INPUT_OUTPUT_ONE_INDEX_FIELD_CAPS_MODEL_ID_KEYWORD,
DENSE_PIPELINE_FIELD_CAPS,
DENSE_OLD_PIPELINE_DOCS,
} from '../../__mocks__/fetch_query_source_fields.mock';
import {
fetchFields,
@ -256,6 +258,54 @@ describe('fetch_query_source_fields', () => {
},
});
});
it('should perform a search request with the correct modelid for old style inference', async () => {
const client = {
asCurrentUser: {
fieldCaps: jest.fn().mockResolvedValue(DENSE_PIPELINE_FIELD_CAPS),
search: jest.fn().mockResolvedValue(DENSE_OLD_PIPELINE_DOCS[0]),
indices: {
getMapping: jest.fn().mockResolvedValue({
'search-test-e5': {
mappings: {},
},
}),
},
},
} as any;
const indices = ['search-test-e5'];
const response = await fetchFields(client, indices);
expect(client.asCurrentUser.search).toHaveBeenCalledWith({
index: 'search-test-e5',
body: {
size: 0,
aggs: {
'ml.inference.body_content.model_id': {
terms: {
field: 'ml.inference.body_content.model_id.enum',
size: 1,
},
},
},
},
});
expect(response).toEqual({
'search-test-e5': {
bm25_query_fields: expect.any(Array),
dense_vector_query_fields: [
{
field: 'ml.inference.body_content.predicted_value',
indices: ['search-test-e5'],
model_id: '.multilingual-e5-small_linux-x86_64',
nested: false,
},
],
elser_query_fields: [],
source_fields: expect.any(Array),
skipped_fields: 30,
},
});
});
});
describe('getModelIdFields', () => {

View file

@ -22,25 +22,15 @@ interface IndexFieldModel {
export const getModelIdFields = (fieldCapsResponse: FieldCapsResponse) => {
const { fields } = fieldCapsResponse;
return Object.keys(fields).reduce<Array<{ path: string; aggField: string }>>((acc, fieldKey) => {
const field = fields[fieldKey];
if (fieldKey.endsWith('model_id')) {
if ('keyword' in field && field.keyword.aggregatable) {
acc.push({
path: fieldKey,
aggField: fieldKey,
});
return acc;
}
const keywordModelIdField = fields[fieldKey + '.keyword'];
const multiField = Object.keys(fields)
.filter((key) => key.startsWith(fieldKey))
.find((key) => fields[key].keyword && fields[key].keyword.aggregatable);
if (
keywordModelIdField &&
`keyword` in keywordModelIdField &&
keywordModelIdField.keyword.aggregatable
) {
if (multiField) {
acc.push({
path: fieldKey,
aggField: fieldKey + '.keyword',
aggField: multiField,
});
return acc;
}