[Search] [Playground] Retrievers playground support (#180780)

Switching to use retrievers instead of query. This also makes use of RRF
support in retrievers for sparse, dense and bm25 combinations.

# ELSER only

![image](03acf71f-1293-460e-ad57-316c6924b7bc)

# ELSER + bm25 hybrid

![image](48283b47-1b5b-45fa-97c7-59321c600f3c)

---------

Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Joe McElroy 2024-04-16 15:11:58 +01:00 committed by GitHub
parent 63bb927473
commit 58c0059779
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 516 additions and 242 deletions

View file

@ -11,6 +11,7 @@ interface ModelFields {
field: string;
model_id: string;
nested: boolean;
indices: string[];
}
export interface QuerySourceFields {

View file

@ -7,7 +7,7 @@
import {
EuiAccordion,
EuiBasicTable,
EuiSelectable,
EuiButton,
EuiButtonEmpty,
EuiCodeBlock,
@ -19,11 +19,10 @@ import {
EuiFlyoutHeader,
EuiPanel,
EuiSpacer,
EuiSwitch,
EuiSelectableOption,
EuiText,
EuiTitle,
} from '@elastic/eui';
import { i18n } from '@kbn/i18n';
import { FormattedMessage } from '@kbn/i18n-react';
import React, { useEffect, useMemo, useState } from 'react';
import { useController, useFormContext } from 'react-hook-form';
@ -59,18 +58,14 @@ export const ViewQueryFlyout: React.FC<ViewQueryFlyoutProps> = ({ onClose }) =>
return queryFields[index].includes(field);
};
const toggleQueryField = (index: string, field: string) => {
if (isQueryFieldSelected(index, field)) {
setQueryFields({
...queryFields,
[index]: queryFields[index].filter((x: string) => x !== field),
});
} else {
setQueryFields({
...queryFields,
[index]: [...queryFields[index], field],
});
}
const updateFields = (index: string, options: EuiSelectableOption[]) => {
const newFields = options
.filter((option) => option.checked === 'on')
.map((option) => option.label);
setQueryFields({
...queryFields,
[index]: newFields,
});
};
const saveQuery = () => {
@ -118,11 +113,12 @@ export const ViewQueryFlyout: React.FC<ViewQueryFlyoutProps> = ({ onClose }) =>
/>
</h5>
</EuiText>
{Object.entries(fields).map(([index, group]) => (
{Object.entries(fields).map(([index, group], i) => (
<EuiFlexItem grow={false} key={index}>
<EuiPanel grow={false} hasShadow={false} hasBorder>
<EuiAccordion
id={index}
initialIsOpen={i === 0}
buttonContent={
<EuiText>
<h5>{index}</h5>
@ -130,48 +126,26 @@ export const ViewQueryFlyout: React.FC<ViewQueryFlyoutProps> = ({ onClose }) =>
}
>
<EuiSpacer size="s" />
<EuiBasicTable
items={[
<EuiSelectable
aria-label="Select query fields"
options={[
...group.elser_query_fields,
...group.dense_vector_query_fields,
...group.bm25_query_fields,
].map((field) => ({
field: typeof field === 'string' ? field : field.field,
label: typeof field === 'string' ? field : field.field,
checked: isQueryFieldSelected(
index,
typeof field === 'string' ? field : field.field
)
? 'on'
: undefined,
}))}
columns={[
{
field: 'field',
name: i18n.translate(
'xpack.searchPlayground.viewQuery.flyout.table.field',
{ defaultMessage: 'Field' }
),
truncateText: false,
render: (field: string) => field,
},
{
actions: [
{
name: 'toggle',
description: i18n.translate(
'xpack.searchPlayground.viewQuery.flyout.table.toggle',
{ defaultMessage: 'Toggle field' }
),
isPrimary: true,
render: ({ field }: { field: string }) => (
<EuiSwitch
showLabel={false}
label="toggle"
checked={isQueryFieldSelected(index, field)}
onChange={(e) => toggleQueryField(index, field)}
compressed
/>
),
},
],
},
]}
hasActions
/>
onChange={(newOptions) => updateFields(index, newOptions)}
listProps={{ bordered: false }}
>
{(list) => list}
</EuiSelectable>
</EuiAccordion>
</EuiPanel>
</EuiFlexItem>

View file

@ -10,14 +10,16 @@ import { createQuery, getDefaultQueryFields, getDefaultSourceFields } from './cr
describe('create_query', () => {
describe('createQuery', () => {
it('should return a query', () => {
it('should return a sparse single query', () => {
const fields = {
index1: ['field1'],
};
const fieldDescriptors: IndicesQuerySourceFields = {
index1: {
elser_query_fields: [{ field: 'field1', model_id: 'model1', nested: false }],
elser_query_fields: [
{ field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] },
],
dense_vector_query_fields: [],
bm25_query_fields: [],
source_fields: [],
@ -25,19 +27,52 @@ describe('create_query', () => {
};
expect(createQuery(fields, fieldDescriptors)).toEqual({
query: {
bool: {
should: [
{
text_expansion: {
field1: {
retriever: {
standard: {
query: {
text_expansion: {
field1: {
model_id: 'model1',
model_text: '{query}',
},
},
},
},
},
});
});
it('should return a knn query single', () => {
const fields = {
index1: ['field1'],
};
const fieldDescriptors: IndicesQuerySourceFields = {
index1: {
elser_query_fields: [],
dense_vector_query_fields: [
{ field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] },
],
bm25_query_fields: [],
source_fields: [],
},
};
expect(createQuery(fields, fieldDescriptors)).toEqual({
retriever: {
standard: {
query: {
knn: {
field: 'field1',
num_candidates: 100,
query_vector_builder: {
text_embedding: {
model_id: 'model1',
model_text: '{query}',
},
},
},
],
minimum_should_match: 1,
},
},
},
});
@ -51,13 +86,17 @@ describe('create_query', () => {
const fieldDescriptors: IndicesQuerySourceFields = {
index1: {
elser_query_fields: [{ field: 'field1', model_id: 'model1', nested: false }],
elser_query_fields: [
{ field: 'field1', model_id: 'model1', nested: false, indices: ['index1', 'index2'] },
],
dense_vector_query_fields: [],
bm25_query_fields: [],
source_fields: [],
},
index2: {
elser_query_fields: [{ field: 'field1', model_id: 'model1', nested: false }],
elser_query_fields: [
{ field: 'field1', model_id: 'model1', nested: false, indices: ['index1', 'index2'] },
],
dense_vector_query_fields: [],
bm25_query_fields: [],
source_fields: [],
@ -65,19 +104,16 @@ describe('create_query', () => {
};
expect(createQuery(fields, fieldDescriptors)).toEqual({
query: {
bool: {
should: [
{
text_expansion: {
field1: {
model_id: 'model1',
model_text: '{query}',
},
retriever: {
standard: {
query: {
text_expansion: {
field1: {
model_id: 'model1',
model_text: '{query}',
},
},
],
minimum_should_match: 1,
},
},
},
});
@ -91,13 +127,17 @@ describe('create_query', () => {
const fieldDescriptors: IndicesQuerySourceFields = {
index1: {
elser_query_fields: [{ field: 'field1', model_id: 'model1', nested: false }],
elser_query_fields: [
{ field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] },
],
dense_vector_query_fields: [],
bm25_query_fields: [],
source_fields: [],
},
index2: {
elser_query_fields: [{ field: 'field2', model_id: 'model1', nested: false }],
elser_query_fields: [
{ field: 'field2', model_id: 'model1', nested: false, indices: ['index2'] },
],
dense_vector_query_fields: [],
bm25_query_fields: [],
source_fields: [],
@ -105,33 +145,40 @@ describe('create_query', () => {
};
expect(createQuery(fields, fieldDescriptors)).toEqual({
query: {
bool: {
should: [
retriever: {
rrf: {
retrievers: [
{
text_expansion: {
field1: {
model_id: 'model1',
model_text: '{query}',
standard: {
query: {
text_expansion: {
field1: {
model_id: 'model1',
model_text: '{query}',
},
},
},
},
},
{
text_expansion: {
field2: {
model_id: 'model1',
model_text: '{query}',
standard: {
query: {
text_expansion: {
field2: {
model_id: 'model1',
model_text: '{query}',
},
},
},
},
},
],
minimum_should_match: 1,
},
},
});
});
it('should throw for nested dense query', () => {
it('should return empty for nested dense query', () => {
const fields = {
index1: ['passages.field1.predicted_value'],
};
@ -140,17 +187,30 @@ describe('create_query', () => {
index1: {
elser_query_fields: [],
dense_vector_query_fields: [
{ field: 'passages.field1.predicted_value', model_id: 'model1', nested: true },
{
field: 'passages.field1.predicted_value',
model_id: 'model1',
nested: true,
indices: ['index1'],
},
],
bm25_query_fields: [],
source_fields: [],
},
};
expect(createQuery(fields, fieldDescriptors)).toEqual({});
expect(createQuery(fields, fieldDescriptors)).toEqual({
retriever: {
standard: {
query: {
match_all: {},
},
},
},
});
});
it('should throw for nested sparse query', () => {
it('should return empty for nested sparse query', () => {
const fields = {
index1: ['passages.field1.tokens'],
};
@ -158,7 +218,12 @@ describe('create_query', () => {
const fieldDescriptors: IndicesQuerySourceFields = {
index1: {
elser_query_fields: [
{ field: 'passages.field1.tokens', model_id: 'model1', nested: true },
{
field: 'passages.field1.tokens',
model_id: 'model1',
nested: true,
indices: ['index1'],
},
],
dense_vector_query_fields: [],
bm25_query_fields: [],
@ -166,63 +231,152 @@ describe('create_query', () => {
},
};
expect(createQuery(fields, fieldDescriptors)).toEqual({});
});
it('should return a hybrid query', () => {
const fields = {
index1: ['field1', 'content', 'title'],
index2: ['field2'],
};
const fieldDescriptors: IndicesQuerySourceFields = {
index1: {
elser_query_fields: [{ field: 'field1', model_id: 'model1', nested: false }],
dense_vector_query_fields: [],
bm25_query_fields: ['content', 'title'],
source_fields: [],
},
index2: {
elser_query_fields: [{ field: 'field2', model_id: 'model1', nested: false }],
dense_vector_query_fields: [],
bm25_query_fields: [],
source_fields: [],
},
};
expect(createQuery(fields, fieldDescriptors)).toEqual({
query: {
bool: {
should: [
{
text_expansion: {
field1: {
model_id: 'model1',
model_text: '{query}',
},
},
},
{
multi_match: {
query: '{query}',
fields: ['content', 'title'],
},
},
{
text_expansion: {
field2: {
model_id: 'model1',
model_text: '{query}',
},
},
},
],
minimum_should_match: 1,
retriever: {
standard: {
query: {
match_all: {},
},
},
},
});
});
describe('hybrid without RRF', () => {
it('should return a hybrid query', () => {
const fields = {
index1: ['field1', 'content', 'title'],
index2: ['field2'],
};
const fieldDescriptors: IndicesQuerySourceFields = {
index1: {
elser_query_fields: [
{ field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] },
],
dense_vector_query_fields: [],
bm25_query_fields: ['content', 'title'],
source_fields: [],
},
index2: {
elser_query_fields: [
{ field: 'field2', model_id: 'model1', nested: false, indices: ['index2'] },
],
dense_vector_query_fields: [],
bm25_query_fields: [],
source_fields: [],
},
};
expect(createQuery(fields, fieldDescriptors, { rrf: false })).toEqual({
retriever: {
standard: {
query: {
bool: {
should: [
{
text_expansion: {
field1: {
model_id: 'model1',
model_text: '{query}',
},
},
},
{
multi_match: {
query: '{query}',
fields: ['content', 'title'],
},
},
{
text_expansion: {
field2: {
model_id: 'model1',
model_text: '{query}',
},
},
},
],
minimum_should_match: 1,
},
},
},
},
});
});
});
describe('hybrid with RRF', () => {
it('should return a hybrid query', () => {
const fields = {
index1: ['field1', 'content', 'title'],
index2: ['field2'],
};
const fieldDescriptors: IndicesQuerySourceFields = {
index1: {
elser_query_fields: [
{ field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] },
],
dense_vector_query_fields: [],
bm25_query_fields: ['content', 'title'],
source_fields: [],
},
index2: {
elser_query_fields: [
{ field: 'field2', model_id: 'model1', nested: false, indices: ['index2'] },
],
dense_vector_query_fields: [],
bm25_query_fields: [],
source_fields: [],
},
};
expect(createQuery(fields, fieldDescriptors)).toEqual({
retriever: {
rrf: {
retrievers: [
{
standard: {
query: {
text_expansion: {
field1: {
model_id: 'model1',
model_text: '{query}',
},
},
},
},
},
{
standard: {
query: {
multi_match: {
query: '{query}',
fields: ['content', 'title'],
},
},
},
},
{
standard: {
query: {
text_expansion: {
field2: {
model_id: 'model1',
model_text: '{query}',
},
},
},
},
},
],
},
},
});
});
});
it('dense vector only', () => {
const fields = {
index1: ['field1'],
@ -231,12 +385,16 @@ describe('create_query', () => {
const fieldDescriptors: IndicesQuerySourceFields = {
index1: {
elser_query_fields: [],
dense_vector_query_fields: [{ field: 'field1', model_id: 'model1', nested: false }],
dense_vector_query_fields: [
{ field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] },
],
bm25_query_fields: ['content', 'title'],
source_fields: [],
},
index2: {
elser_query_fields: [{ field: 'field2', model_id: 'model1', nested: false }],
elser_query_fields: [
{ field: 'field2', model_id: 'model1', nested: false, indices: ['index2'] },
],
dense_vector_query_fields: [],
bm25_query_fields: [],
source_fields: [],
@ -244,19 +402,23 @@ describe('create_query', () => {
};
expect(createQuery(fields, fieldDescriptors)).toEqual({
knn: [
{
field: 'field1',
k: 10,
num_candidates: 100,
query_vector_builder: {
text_embedding: {
model_id: 'model1',
model_text: '{query}',
retriever: {
standard: {
query: {
knn: {
field: 'field1',
num_candidates: 100,
filter: { terms: { _index: ['index1'] } },
query_vector_builder: {
text_embedding: {
model_id: 'model1',
model_text: '{query}',
},
},
},
},
},
],
},
});
});
@ -268,39 +430,47 @@ describe('create_query', () => {
const fieldDescriptors: IndicesQuerySourceFields = {
index1: {
elser_query_fields: [],
dense_vector_query_fields: [{ field: 'field1', model_id: 'model1', nested: false }],
dense_vector_query_fields: [
{ field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] },
],
bm25_query_fields: ['content', 'title'],
source_fields: [],
},
};
expect(createQuery(fields, fieldDescriptors)).toEqual({
query: {
bool: {
should: [
retriever: {
rrf: {
retrievers: [
{
multi_match: {
query: '{query}',
fields: ['title', 'content'],
standard: {
query: {
multi_match: {
query: '{query}',
fields: ['title', 'content'],
},
},
},
},
{
standard: {
query: {
knn: {
field: 'field1',
num_candidates: 100,
query_vector_builder: {
text_embedding: {
model_id: 'model1',
model_text: '{query}',
},
},
},
},
},
},
],
minimum_should_match: 1,
},
},
knn: [
{
field: 'field1',
k: 10,
num_candidates: 100,
query_vector_builder: {
text_embedding: {
model_id: 'model1',
model_text: '{query}',
},
},
},
],
});
});
});
@ -309,8 +479,12 @@ describe('create_query', () => {
it('should return default ELSER query fields', () => {
const fieldDescriptors: IndicesQuerySourceFields = {
index1: {
elser_query_fields: [{ field: 'field1', model_id: 'model1', nested: false }],
dense_vector_query_fields: [{ field: 'field1', model_id: 'dense_model', nested: false }],
elser_query_fields: [
{ field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] },
],
dense_vector_query_fields: [
{ field: 'field1', model_id: 'dense_model', nested: false, indices: ['index1'] },
],
bm25_query_fields: [],
source_fields: [],
},
@ -322,17 +496,31 @@ describe('create_query', () => {
it('should return default elser query fields for multiple indices', () => {
const fieldDescriptors: IndicesQuerySourceFields = {
index1: {
elser_query_fields: [{ field: 'field1', model_id: 'model1', nested: false }],
elser_query_fields: [
{ field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] },
],
dense_vector_query_fields: [
{ field: 'dv_field1', model_id: 'dense_model', nested: false },
{
field: 'dv_field1',
model_id: 'dense_model',
nested: false,
indices: ['index1', 'index2'],
},
],
bm25_query_fields: [],
source_fields: [],
},
index2: {
elser_query_fields: [{ field: 'vector', model_id: 'model1', nested: false }],
elser_query_fields: [
{ field: 'vector', model_id: 'model1', nested: false, indices: ['index2'] },
],
dense_vector_query_fields: [
{ field: 'dv_field1', model_id: 'dense_model', nested: false },
{
field: 'dv_field1',
model_id: 'dense_model',
nested: false,
indices: ['index1', 'index2'],
},
],
bm25_query_fields: [],
source_fields: [],
@ -348,17 +536,31 @@ describe('create_query', () => {
it('should return elser query fields for default fields', () => {
const fieldDescriptors: IndicesQuerySourceFields = {
index1: {
elser_query_fields: [{ field: 'field1', model_id: 'model1', nested: false }],
elser_query_fields: [
{ field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] },
],
dense_vector_query_fields: [
{ field: 'dv_field1', model_id: 'dense_model', nested: false },
{
field: 'dv_field1',
model_id: 'dense_model',
nested: false,
indices: ['index1', 'index2'],
},
],
bm25_query_fields: [],
source_fields: [],
},
index2: {
elser_query_fields: [{ field: 'vector', model_id: 'model1', nested: false }],
elser_query_fields: [
{ field: 'vector', model_id: 'model1', nested: false, indices: ['index2'] },
],
dense_vector_query_fields: [
{ field: 'dv_field1', model_id: 'dense_model', nested: false },
{
field: 'dv_field1',
model_id: 'dense_model',
nested: false,
indices: ['index1', 'index2'],
},
],
bm25_query_fields: [],
source_fields: [],
@ -376,7 +578,7 @@ describe('create_query', () => {
index1: {
elser_query_fields: [],
dense_vector_query_fields: [
{ field: 'dv_field1', model_id: 'dense_model', nested: false },
{ field: 'dv_field1', model_id: 'dense_model', nested: false, indices: ['index1'] },
],
bm25_query_fields: [],
source_fields: [],

View file

@ -16,7 +16,14 @@ const SUGGESTED_SPARSE_FIELDS = [
'vector.tokens', // LangChain field
];
const SUGGESTED_BM25_FIELDS = ['title', 'body_content', 'page_content_text', 'text', 'content'];
const SUGGESTED_BM25_FIELDS = [
'title',
'body_content',
'page_content_text',
'text',
'content',
`text_field`,
];
const SUGGESTED_DENSE_VECTOR_FIELDS = ['content_vector.tokens'];
@ -27,7 +34,18 @@ interface Matches {
knnMatches: any[];
}
export function createQuery(fields: IndexFields, fieldDescriptors: IndicesQuerySourceFields) {
interface ReRankOptions {
rrf: boolean;
}
export function createQuery(
fields: IndexFields,
fieldDescriptors: IndicesQuerySourceFields,
rerankOptions: ReRankOptions = {
rrf: true,
}
) {
const indices = Object.keys(fieldDescriptors);
const boolMatches = Object.keys(fields).reduce<Matches>(
(acc, index) => {
const indexFields: string[] = fields[index];
@ -86,14 +104,22 @@ export function createQuery(fields: IndexFields, fieldDescriptors: IndicesQueryS
// not supporting nested fields for now
if (denseVectorField && !denseVectorField.nested) {
// when the knn field isn't found in all indices, we need a filter to ensure we only use the field from the correct index
const filter =
denseVectorField.indices.length < indices.length
? { filter: { terms: { _index: denseVectorField.indices } } }
: {};
return {
field: denseVectorField.field,
k: 10,
num_candidates: 100,
query_vector_builder: {
text_embedding: {
model_id: denseVectorField.model_id,
model_text: '{query}',
knn: {
field: denseVectorField.field,
num_candidates: 100,
...filter,
query_vector_builder: {
text_embedding: {
model_id: denseVectorField.model_id,
model_text: '{query}',
},
},
},
};
@ -115,18 +141,73 @@ export function createQuery(fields: IndexFields, fieldDescriptors: IndicesQueryS
}
);
return {
...(boolMatches.queryMatches.length > 0
? {
// for single Elser support to make it easy to read - skips bool query
if (boolMatches.queryMatches.length === 1 && boolMatches.knnMatches.length === 0) {
return {
retriever: {
standard: {
query: boolMatches.queryMatches[0],
},
},
};
}
// for single Dense vector support to make it easy to read - skips bool query
if (boolMatches.queryMatches.length === 0 && boolMatches.knnMatches.length === 1) {
return {
retriever: {
standard: {
query: boolMatches.knnMatches[0],
},
},
};
}
const matches = [...boolMatches.queryMatches, ...boolMatches.knnMatches];
if (matches.length === 0) {
return {
retriever: {
standard: {
query: {
bool: {
should: boolMatches.queryMatches,
minimum_should_match: 1,
},
match_all: {},
},
}
: {}),
...(boolMatches.knnMatches.length > 0 ? { knn: boolMatches.knnMatches } : {}),
},
},
};
}
// determine if we need to use a rrf query
if (rerankOptions.rrf) {
const retrievers = matches.map((clause) => {
return {
standard: {
query: clause,
},
};
});
return {
retriever: {
rrf: {
retrievers,
},
},
};
}
// No RRF - add all the matches (DENSE + BM25 + SPARSE) to the bool query
return {
retriever: {
standard: {
query: {
bool: {
should: matches,
minimum_should_match: 1,
},
},
},
},
};
}

View file

@ -44,7 +44,9 @@ describe('conversational chain', () => {
});
const mockElasticsearchClient = {
search: searchMock,
transport: {
request: searchMock,
},
};
const llm = new FakeListLLM({
@ -126,9 +128,9 @@ describe('conversational chain', () => {
],
[
{
index: 'index,website',
query: { match: { field: 'what is the work from home policy?' } },
size: 3,
method: 'POST',
path: '/index,website/_search',
body: { query: { match: { field: 'what is the work from home policy?' } }, size: 3 },
},
]
);
@ -166,9 +168,9 @@ describe('conversational chain', () => {
],
[
{
index: 'index,website',
query: { match: { field: 'rewrite the question' } },
size: 3,
method: 'POST',
path: '/index,website/_search',
body: { query: { match: { field: 'rewrite the question' } }, size: 3 },
},
]
);
@ -206,9 +208,9 @@ describe('conversational chain', () => {
],
[
{
index: 'index,website',
query: { match: { field: 'rewrite "the" question' } },
size: 3,
method: 'POST',
path: '/index,website/_search',
body: { query: { match: { field: 'rewrite "the" question' } }, size: 3 },
},
]
);

View file

@ -8,7 +8,11 @@
import { BaseRetriever, type BaseRetrieverInput } from '@langchain/core/retrievers';
import { Document } from '@langchain/core/documents';
import { Client } from '@elastic/elasticsearch';
import { SearchHit } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import {
AggregationsAggregate,
SearchHit,
SearchResponse,
} from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
export interface ElasticsearchRetrieverInput extends BaseRetrieverInput {
/**
@ -64,11 +68,15 @@ export class ElasticsearchRetriever extends BaseRetriever {
async _getRelevantDocuments(query: string): Promise<Document[]> {
try {
const queryBody = this.query_body_fn(query);
const results = await this.client.search({
...queryBody,
index: this.index,
size: this.k,
});
const results = (await this.client.transport.request({
method: 'POST',
path: `/${this.index}/_search`,
body: {
...queryBody,
size: this.k,
},
})) as SearchResponse<unknown, Record<string, AggregationsAggregate>>;
const hits = results.hits.hits;

View file

@ -33,40 +33,41 @@ describe('fetch_query_source_fields', () => {
workplace_index: {
bm25_query_fields: [
'metadata.summary',
'vector.model_id',
'metadata.rolePermissions',
'text',
'metadata.name',
],
dense_vector_query_fields: [],
elser_query_fields: [
{ field: 'vector.tokens', model_id: '.elser_model_2', nested: false },
],
source_fields: [
'metadata.summary',
'vector.model_id',
'metadata.rolePermissions',
'text',
'metadata.name',
{
field: 'vector.tokens',
model_id: '.elser_model_2',
nested: false,
indices: ['workplace_index'],
},
],
source_fields: ['metadata.summary', 'metadata.rolePermissions', 'text', 'metadata.name'],
},
workplace_index2: {
bm25_query_fields: [
'metadata.summary',
'content',
'metadata.rolePermissions',
'content_vector.model_id',
'metadata.name',
],
dense_vector_query_fields: [],
elser_query_fields: [
{ field: 'content_vector.tokens', model_id: '.elser_model_2', nested: false },
{
field: 'content_vector.tokens',
model_id: '.elser_model_2',
nested: false,
indices: ['workplace_index2'],
},
],
source_fields: [
'metadata.summary',
'content',
'metadata.rolePermissions',
'content_vector.model_id',
'metadata.name',
],
},
@ -85,7 +86,6 @@ describe('fetch_query_source_fields', () => {
'search-example-main': {
bm25_query_fields: [
'page_content_key',
'page_content_e5_embbeding.model_id',
'title',
'main_button.button_title',
'page_notification',
@ -104,12 +104,12 @@ describe('fetch_query_source_fields', () => {
field: 'page_content_e5_embbeding.predicted_value',
model_id: '.multilingual-e5-small_linux-x86_64',
nested: false,
indices: ['search-example-main'],
},
],
elser_query_fields: [],
source_fields: [
'page_content_key',
'page_content_e5_embbeding.model_id',
'title',
'main_button.button_title',
'page_notification',
@ -145,7 +145,6 @@ describe('fetch_query_source_fields', () => {
'metadata.name',
'passages.text',
'metadata.summary',
'passages.vector.model_id',
'metadata.content',
],
dense_vector_query_fields: [
@ -153,6 +152,7 @@ describe('fetch_query_source_fields', () => {
field: 'passages.vector.predicted_value',
model_id: '.multilingual-e5-small',
nested: true,
indices: ['workplace_index_nested'],
},
],
elser_query_fields: [],
@ -164,7 +164,6 @@ describe('fetch_query_source_fields', () => {
'metadata.name',
'passages.text',
'metadata.summary',
'passages.vector.model_id',
'metadata.content',
],
},

View file

@ -87,6 +87,11 @@ export const parseFieldsCapabilities = (
return acc;
}, {});
// metadata fields that are ignored
const shouldIgnoreField = (field: string) => {
return !field.endsWith('.model_id');
};
const querySourceFields = Object.keys(fields).reduce<IndicesQuerySourceFields>(
(acc: IndicesQuerySourceFields, fieldKey) => {
const field = fields[fieldKey];
@ -105,6 +110,7 @@ export const parseFieldsCapabilities = (
field: fieldKey,
model_id: getModelField(fieldKey, indexDoc, nestedField),
nested: !!isFieldNested(fieldKey, fieldCapsResponse),
indices: indicesPresentIn,
};
acc[index].elser_query_fields.push(elserModelField);
} else if ('dense_vector' in field) {
@ -113,9 +119,10 @@ export const parseFieldsCapabilities = (
field: fieldKey,
model_id: getModelField(fieldKey, indexDoc, nestedField),
nested: !!nestedField,
indices: indicesPresentIn,
};
acc[index].dense_vector_query_fields.push(denseVectorField);
} else if ('text' in field && field.text.searchable) {
} else if ('text' in field && field.text.searchable && shouldIgnoreField(fieldKey)) {
acc[index].bm25_query_fields.push(fieldKey);
acc[index].source_fields.push(fieldKey);
}