[Search] [Playground] [Bug Fix] Model id detection fix when using search based dynamic template (#186665)

## Summary

model_id detection relies on creating an aggregation for the model_id
field which requires a keyword field. This is usually added to the
`.keyword` multi-field. The issue is when using the `search` dynamic
template, all text based fields create a keyword field with the `enum`
key.

This fix iterates through all the sub fields of the model_id field to
find a keyword based one, rather than relying on the `.keyword`
subfield.

### Checklist

Delete any items that are not applicable to this PR.

- [ ] Any text added follows [EUI's writing
guidelines](https://elastic.github.io/eui/#/guidelines/writing), uses
sentence case text and includes [i18n
support](https://github.com/elastic/kibana/blob/main/packages/kbn-i18n/README.md)
- [ ]
[Documentation](https://www.elastic.co/guide/en/kibana/master/development-documentation.html)
was added for features that require explanation or tutorials
- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios
- [ ] [Flaky Test
Runner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was
used on any tests changed
- [ ] Any UI touched in this PR is usable by keyboard only (learn more
about [keyboard accessibility](https://webaim.org/techniques/keyboard/))
- [ ] Any UI touched in this PR does not create any new axe failures
(run axe in browser:
[FF](https://addons.mozilla.org/en-US/firefox/addon/axe-devtools/),
[Chrome](https://chrome.google.com/webstore/detail/axe-web-accessibility-tes/lhdoppojpmngadmnindnejefpokejbdd?hl=en-US))
- [ ] If a plugin configuration key changed, check if it needs to be
allowlisted in the cloud and added to the [docker
list](https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/resources/base/bin/kibana-docker)
- [ ] This renders correctly on smaller devices using a responsive
layout. (You can test this [in your
browser](https://www.browserstack.com/guide/responsive-testing-on-local-server))
- [ ] This was checked for [cross-browser
compatibility](https://www.elastic.co/support/matrix#matrix_browsers)
This commit is contained in:
Joe McElroy 2024-06-24 16:17:09 +01:00 committed by GitHub
parent be11a5060a
commit 175b41a6fa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 642 additions and 15 deletions

View file

@ -206,6 +206,27 @@ export const DENSE_SPARSE_SAME_FIELD_NAME_CAPS = {
},
};
export const DENSE_OLD_PIPELINE_DOCS = [
{
took: 1,
timed_out: false,
_shards: { total: 1, successful: 1, skipped: 0, failed: 0 },
hits: { total: { value: 1, relation: 'eq' }, max_score: null, hits: [] },
aggregations: {
'ml.inference.body_content.model_id': {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [
{
key: '.multilingual-e5-small_linux-x86_64',
doc_count: 1,
},
],
},
},
} as SearchResponse<any>,
];
export const DENSE_SPARSE_SAME_FIELD_NAME_DOCS = [
{
took: 1,
@ -959,6 +980,572 @@ export const ELSER_PASSAGE_CHUNKED_TWO_INDICES = {
},
};
export const DENSE_PIPELINE_FIELD_CAPS = {
indices: ['search-test-e5'],
fields: {
additional_urls: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'title.stem': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'_ingest.processors.pipeline.stem': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'headings.delimiter': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'ml.inference.body_content.model_id.prefix': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'headings.enum': {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors': {
object: {
type: 'object',
metadata_field: false,
searchable: false,
aggregatable: false,
},
},
'_ingest.processors.types.joined': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'body_content.enum': {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
links: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
id: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'ml.inference.body_content.model_id.joined': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
ml: {
object: {
type: 'object',
metadata_field: false,
searchable: false,
aggregatable: false,
},
},
'ml.inference.body_content.model_id': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'ml.inference': {
object: {
type: 'object',
metadata_field: false,
searchable: false,
aggregatable: false,
},
},
body_content: {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'_ingest.processors.pipeline.joined': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
domains: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors.model_version.enum': {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'body_content.joined': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
url_scheme: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
meta_description: {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'ml.inference.body_content': {
object: {
type: 'object',
metadata_field: false,
searchable: false,
aggregatable: false,
},
},
headings: {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'_ingest.processors.types.enum': {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
last_crawled_at: {
date: {
type: 'date',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors.model_version.joined': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'title.delimiter': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'headings.prefix': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'title.enum': {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors.pipeline.enum': {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors.pipeline.prefix': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'meta_description.prefix': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'_ingest.processors.types.delimiter': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'title.joined': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'body_content.stem': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'_ingest.processors.types.stem': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'ml.inference.body_content.model_id.enum': {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
title: {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
meta_keywords: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors.processed_timestamp': {
date: {
type: 'date',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'ml.inference.body_content.model_id.delimiter': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'meta_description.enum': {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'meta_description.delimiter': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'title.prefix': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'_ingest.processors.pipeline': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
_ingest: {
object: {
type: 'object',
metadata_field: false,
searchable: false,
aggregatable: false,
},
},
'ml.inference.body_content.is_truncated': {
boolean: {
type: 'boolean',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors.model_version.prefix': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'_ingest.processors.model_version.delimiter': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
url_host: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
url_path: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors.model_version': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
url_path_dir3: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors.pipeline.delimiter': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'headings.joined': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'_ingest.processors.types': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'meta_description.joined': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'ml.inference.body_content.predicted_value': {
dense_vector: {
type: 'dense_vector',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
url: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'meta_description.stem': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'ml.inference.body_content.model_id.stem': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
url_port: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'body_content.delimiter': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'_ingest.processors.model_version.stem': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
url_path_dir2: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
url_path_dir1: {
keyword: {
type: 'keyword',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'_ingest.processors.types.prefix': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'headings.stem': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
'body_content.prefix': {
text: {
type: 'text',
metadata_field: false,
searchable: true,
aggregatable: false,
},
},
},
};
export const ELSER_PASSAGE_CHUNKED = {
indices: ['search-nethys'],
fields: {

View file

@ -26,6 +26,8 @@ import {
DENSE_SEMANTIC_FIELD_MAPPINGS,
DENSE_SEMANTIC_FIELD_FIELD_CAPS,
DENSE_SEMANTIC_FIELD_MAPPINGS_MISSING_TASK_TYPE,
DENSE_PIPELINE_FIELD_CAPS,
DENSE_OLD_PIPELINE_DOCS,
} from '../../__mocks__/fetch_query_source_fields.mock';
import {
fetchFields,
@ -506,6 +508,54 @@ describe('fetch_query_source_fields', () => {
});
});
it('should perform a search request with the correct modelid for old style inference', async () => {
const client = {
asCurrentUser: {
fieldCaps: jest.fn().mockResolvedValue(DENSE_PIPELINE_FIELD_CAPS),
search: jest.fn().mockResolvedValue(DENSE_OLD_PIPELINE_DOCS[0]),
indices: {
getMapping: jest.fn().mockResolvedValue({
'search-test-e5': {
mappings: {},
},
}),
},
},
} as any;
const indices = ['search-test-e5'];
const response = await fetchFields(client, indices);
expect(client.asCurrentUser.search).toHaveBeenCalledWith({
index: 'search-test-e5',
body: {
size: 0,
aggs: {
'ml.inference.body_content.model_id': {
terms: {
field: 'ml.inference.body_content.model_id.enum',
size: 1,
},
},
},
},
});
expect(response).toEqual({
'search-test-e5': {
bm25_query_fields: expect.any(Array),
dense_vector_query_fields: [
{
field: 'ml.inference.body_content.predicted_value',
indices: ['search-test-e5'],
model_id: '.multilingual-e5-small_linux-x86_64',
},
],
elser_query_fields: [],
semantic_fields: [],
source_fields: expect.any(Array),
skipped_fields: 30,
},
});
});
it('should perform a search request with the correct parameters with top level model id', async () => {
const client = {
asCurrentUser: {

View file

@ -53,25 +53,15 @@ const EMBEDDING_TYPE: Record<TaskType, SemanticEmbeddingType> = {
export const getModelIdFields = (fieldCapsResponse: FieldCapsResponse) => {
const { fields } = fieldCapsResponse;
return Object.keys(fields).reduce<Array<{ path: string; aggField: string }>>((acc, fieldKey) => {
const field = fields[fieldKey];
if (fieldKey.endsWith('model_id')) {
if ('keyword' in field && field.keyword.aggregatable) {
acc.push({
path: fieldKey,
aggField: fieldKey,
});
return acc;
}
const keywordModelIdField = fields[fieldKey + '.keyword'];
const multiField = Object.keys(fields)
.filter((key) => key.startsWith(fieldKey))
.find((key) => fields[key].keyword && fields[key].keyword.aggregatable);
if (
keywordModelIdField &&
`keyword` in keywordModelIdField &&
keywordModelIdField.keyword.aggregatable
) {
if (multiField) {
acc.push({
path: fieldKey,
aggField: fieldKey + '.keyword',
aggField: multiField,
});
return acc;
}