[ML] AIOps: Fix text field candidate selection for log rate analysis. (#179699)

This commit is contained in:
Walter Rafelsberger 2024-03-29 20:43:36 +01:00 committed by GitHub
parent 2b991ecb6d
commit a8e2581f65
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 757 additions and 16 deletions

View file

@ -0,0 +1,253 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
export const fieldCapsEcommerceMock = {
indices: ['ft_ecommerce'],
fields: {
'products.manufacturer': {
text: { type: 'text', metadata_field: false, searchable: true, aggregatable: false },
},
'products.discount_amount': {
half_float: {
type: 'half_float',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'products.base_unit_price': {
half_float: {
type: 'half_float',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
type: {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'products.discount_percentage': {
half_float: {
type: 'half_float',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'products._id.keyword': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
day_of_week_i: {
integer: { type: 'integer', metadata_field: false, searchable: true, aggregatable: true },
},
total_quantity: {
integer: { type: 'integer', metadata_field: false, searchable: true, aggregatable: true },
},
total_unique_products: {
integer: { type: 'integer', metadata_field: false, searchable: true, aggregatable: true },
},
taxless_total_price: {
half_float: {
type: 'half_float',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'geoip.continent_name': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
sku: {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
_version: {
_version: { type: '_version', metadata_field: true, searchable: false, aggregatable: true },
},
'customer_full_name.keyword': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'category.keyword': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'products.taxless_price': {
half_float: {
type: 'half_float',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'products.quantity': {
integer: { type: 'integer', metadata_field: false, searchable: true, aggregatable: true },
},
'products.price': {
half_float: {
type: 'half_float',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
customer_first_name: {
text: { type: 'text', metadata_field: false, searchable: true, aggregatable: false },
},
customer_phone: {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'geoip.region_name': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
_tier: {
keyword: { type: 'keyword', metadata_field: true, searchable: true, aggregatable: true },
},
_seq_no: {
_seq_no: { type: '_seq_no', metadata_field: true, searchable: true, aggregatable: true },
},
customer_full_name: {
text: { type: 'text', metadata_field: false, searchable: true, aggregatable: false },
},
'geoip.country_iso_code': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
_source: {
_source: { type: '_source', metadata_field: true, searchable: false, aggregatable: false },
},
_id: { _id: { type: '_id', metadata_field: true, searchable: true, aggregatable: false } },
order_id: {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'products._id': {
text: { type: 'text', metadata_field: false, searchable: true, aggregatable: false },
},
'products.product_name.keyword': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
_index: {
_index: { type: '_index', metadata_field: true, searchable: true, aggregatable: true },
},
'products.product_id': {
long: { type: 'long', metadata_field: false, searchable: true, aggregatable: true },
},
'products.category': {
text: { type: 'text', metadata_field: false, searchable: true, aggregatable: false },
},
'products.manufacturer.keyword': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
manufacturer: {
text: { type: 'text', metadata_field: false, searchable: true, aggregatable: false },
},
products: {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
'products.unit_discount_amount': {
half_float: {
type: 'half_float',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
customer_last_name: {
text: { type: 'text', metadata_field: false, searchable: true, aggregatable: false },
},
'geoip.location': {
geo_point: { type: 'geo_point', metadata_field: false, searchable: true, aggregatable: true },
},
'products.tax_amount': {
half_float: {
type: 'half_float',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'products.product_name': {
text: { type: 'text', metadata_field: false, searchable: true, aggregatable: false },
},
'products.min_price': {
half_float: {
type: 'half_float',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
'manufacturer.keyword': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'products.taxful_price': {
half_float: {
type: 'half_float',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
currency: {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'products.base_price': {
half_float: {
type: 'half_float',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
email: {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
day_of_week: {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'products.sku': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'customer_last_name.keyword': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
geoip: {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
'products.category.keyword': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'geoip.city_name': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
order_date: {
date: { type: 'date', metadata_field: false, searchable: true, aggregatable: true },
},
'customer_first_name.keyword': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'products.created_on': {
date: { type: 'date', metadata_field: false, searchable: true, aggregatable: true },
},
category: {
text: { type: 'text', metadata_field: false, searchable: true, aggregatable: false },
},
customer_id: {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
user: {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
customer_gender: {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
taxful_total_price: {
half_float: {
type: 'half_float',
metadata_field: false,
searchable: true,
aggregatable: true,
},
},
},
};

View file

@ -0,0 +1,34 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
export const fieldCapsLargeArraysMock = {
indices: ['large_arrays'],
fields: {
_tier: {
keyword: { type: 'keyword', metadata_field: true, searchable: true, aggregatable: true },
},
_seq_no: {
_seq_no: { type: '_seq_no', metadata_field: true, searchable: true, aggregatable: true },
},
'@timestamp': {
date: { type: 'date', metadata_field: false, searchable: true, aggregatable: true },
},
_index: {
_index: { type: '_index', metadata_field: true, searchable: true, aggregatable: true },
},
_source: {
_source: { type: '_source', metadata_field: true, searchable: false, aggregatable: false },
},
_id: { _id: { type: '_id', metadata_field: true, searchable: true, aggregatable: false } },
_version: {
_version: { type: '_version', metadata_field: true, searchable: false, aggregatable: true },
},
items: {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
},
};

View file

@ -0,0 +1,286 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
export const fieldCapsPgBenchMock = {
indices: ['my-index'],
fields: {
stack: {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
metadata: {
flattened: { type: 'flattened', metadata_field: false, searchable: true, aggregatable: true },
},
'kubernetes.namespace_uid': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'host.hostname': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'kubernetes.node.labels.kubernetes_io/os': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
hostname: {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
_metadata: {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
_version: {
_version: { type: '_version', metadata_field: true, searchable: false, aggregatable: true },
},
'req.headers.x-real-ip': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
amount_f: {
float: { type: 'float', metadata_field: false, searchable: true, aggregatable: true },
},
'kubernetes.node.labels.addon_gke_io/node-local-dns-ds-ready': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'container.labels': {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
'kubernetes.pod.ip': {
ip: { type: 'ip', metadata_field: false, searchable: true, aggregatable: true },
},
'_metadata.user_id': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'kubernetes.container.name': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'postgresql.log.database': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'container.labels.annotation_io_kubernetes_container_restartCount': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
fileset: {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
'kubernetes.node.labels.beta_kubernetes_io/arch': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'host.os.platform': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
_field_names: {
_field_names: {
type: '_field_names',
metadata_field: true,
searchable: true,
aggregatable: false,
},
},
'cloud.account.id': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
v: { long: { type: 'long', metadata_field: false, searchable: true, aggregatable: true } },
'error.message': {
text: { type: 'text', metadata_field: false, searchable: true, aggregatable: false },
},
elasticapm_transaction_id: {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'log.file.path': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'kubernetes.node.labels.kubernetes_io/arch': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'container.labels.io_kubernetes_container_name': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'user.name': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'user.name.text': {
text: { type: 'text', metadata_field: false, searchable: true, aggregatable: false },
},
'cloud.instance.name': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'req.headers.accept-encoding': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
kubernetes: {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
agent: {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
'cloud.instance': {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
'container.labels.io_kubernetes_pod_namespace': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'kubernetes.labels.pod-template-hash': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'log.origin': {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
'kubernetes.node.labels.cloud_google_com/machine-family': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
elasticapm_span_id: {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'host.os': {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
'host.os.name': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'host.os.name.text': {
text: { type: 'text', metadata_field: false, searchable: true, aggregatable: false },
},
'log.level': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
details: {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'postgresql.log.query': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'process.thread': {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
'container.labels.annotation_io_kubernetes_pod_terminationGracePeriod': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
req: {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
'kubernetes.node.labels.cloud_google_com/gke-boot-disk': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'_metadata.elastic_apm_trace_id': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'log.file': {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
'log.offset': {
long: { type: 'long', metadata_field: false, searchable: true, aggregatable: true },
},
'client.ip': {
ip: { type: 'ip', metadata_field: false, searchable: true, aggregatable: true },
},
'process.name': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'process.name.text': {
text: { type: 'text', metadata_field: false, searchable: true, aggregatable: false },
},
name: {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'agent.version': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'host.os.family': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'req.headers.origin': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'kubernetes.node.labels.node_kubernetes_io/instance-type': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'req.headers.tracestate': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'postgresql.log.timestamp': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'_metadata.metadata_event_dataset': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
related: {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
'event.module': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'req.headers': {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
'host.os.kernel': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'kubernetes.node.labels.cloud_google_com/gke-container-runtime': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'kubernetes.pod.name': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
client: {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
'req.headers.cache-control': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'event.timezone': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'log.origin.file.line': {
long: { type: 'long', metadata_field: false, searchable: true, aggregatable: true },
},
'service.name': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'kubernetes.namespace_labels.kubernetes_io/metadata_name': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
message: {
text: { type: 'text', metadata_field: false, searchable: true, aggregatable: false },
},
_source: {
_source: { type: '_source', metadata_field: true, searchable: false, aggregatable: false },
},
log: {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
event: {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
'event.duration': {
long: { type: 'long', metadata_field: false, searchable: true, aggregatable: true },
},
'event.ingested': {
date: { type: 'date', metadata_field: false, searchable: true, aggregatable: true },
},
'@timestamp': {
date: { type: 'date', metadata_field: false, searchable: true, aggregatable: true },
},
transaction: {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
span: {
object: { type: 'object', metadata_field: false, searchable: false, aggregatable: false },
},
'_metadata.sum': {
long: { type: 'long', metadata_field: false, searchable: true, aggregatable: true },
},
_tier: {
keyword: { type: 'keyword', metadata_field: true, searchable: true, aggregatable: true },
},
_seq_no: {
_seq_no: { type: '_seq_no', metadata_field: true, searchable: true, aggregatable: true },
},
code: { long: { type: 'long', metadata_field: false, searchable: true, aggregatable: true } },
_index: {
_index: { type: '_index', metadata_field: true, searchable: true, aggregatable: true },
},
'client.geo.location': {
geo_point: { type: 'geo_point', metadata_field: false, searchable: true, aggregatable: true },
},
},
};

View file

@ -10,12 +10,15 @@ import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import type { ElasticsearchClient } from '@kbn/core/server';
import { paramsSearchQueryMock } from './__mocks__/params_search_query';
import { fieldCapsPgBenchMock } from './__mocks__/field_caps_pgbench';
import { fieldCapsEcommerceMock } from './__mocks__/field_caps_ecommerce';
import { fieldCapsLargeArraysMock } from './__mocks__/field_caps_large_arrays';
import { fetchIndexInfo } from './fetch_index_info';
describe('fetch_index_info', () => {
describe('fetchFieldCandidates', () => {
it('returns field candidates and total hits', async () => {
it('returns field candidates and total hits for "my" fields', async () => {
const esClientFieldCapsMock = jest.fn(() => ({
fields: {
// Should end up as a field candidate
@ -31,15 +34,7 @@ describe('fetch_index_info', () => {
const esClientSearchMock = jest.fn((req: estypes.SearchRequest): estypes.SearchResponse => {
return {
hits: {
hits: [
{
fields: {
myIpFieldName: '1.1.1.1',
myKeywordFieldName: 'myKeywordFieldValue',
myNumericFieldName: 1234,
},
},
],
hits: [],
total: { value: 5000000 },
},
} as unknown as estypes.SearchResponse;
@ -59,5 +54,175 @@ describe('fetch_index_info', () => {
expect(esClientFieldCapsMock).toHaveBeenCalledTimes(1);
expect(esClientSearchMock).toHaveBeenCalledTimes(2);
});
it('returns field candidates and total hits for pgBench mappings', async () => {
const esClientFieldCapsMock = jest.fn(() => fieldCapsPgBenchMock);
const esClientSearchMock = jest.fn((req: estypes.SearchRequest): estypes.SearchResponse => {
return {
hits: {
hits: [],
total: { value: 5000000 },
},
} as unknown as estypes.SearchResponse;
});
const esClientMock = {
fieldCaps: esClientFieldCapsMock,
search: esClientSearchMock,
} as unknown as ElasticsearchClient;
const {
baselineTotalDocCount,
deviationTotalDocCount,
fieldCandidates,
textFieldCandidates,
} = await fetchIndexInfo(esClientMock, paramsSearchQueryMock);
expect(fieldCandidates).toEqual([
'_metadata.elastic_apm_trace_id',
'_metadata.metadata_event_dataset',
'_metadata.user_id',
'agent.version',
'client.ip',
'cloud.account.id',
'cloud.instance.name',
'container.labels.annotation_io_kubernetes_container_restartCount',
'container.labels.annotation_io_kubernetes_pod_terminationGracePeriod',
'container.labels.io_kubernetes_container_name',
'container.labels.io_kubernetes_pod_namespace',
'details',
'elasticapm_span_id',
'elasticapm_transaction_id',
'event.module',
'event.timezone',
'host.hostname',
'host.os.family',
'host.os.kernel',
'host.os.name',
'host.os.platform',
'hostname',
'kubernetes.container.name',
'kubernetes.labels.pod-template-hash',
'kubernetes.namespace_labels.kubernetes_io/metadata_name',
'kubernetes.namespace_uid',
'kubernetes.node.labels.addon_gke_io/node-local-dns-ds-ready',
'kubernetes.node.labels.beta_kubernetes_io/arch',
'kubernetes.node.labels.cloud_google_com/gke-boot-disk',
'kubernetes.node.labels.cloud_google_com/gke-container-runtime',
'kubernetes.node.labels.cloud_google_com/machine-family',
'kubernetes.node.labels.kubernetes_io/arch',
'kubernetes.node.labels.kubernetes_io/os',
'kubernetes.node.labels.node_kubernetes_io/instance-type',
'kubernetes.pod.ip',
'kubernetes.pod.name',
'log.file.path',
'log.level',
'name',
'postgresql.log.database',
'postgresql.log.query',
'postgresql.log.timestamp',
'process.name',
'req.headers.accept-encoding',
'req.headers.cache-control',
'req.headers.origin',
'req.headers.tracestate',
'req.headers.x-real-ip',
'service.name',
'stack',
'user.name',
]);
expect(textFieldCandidates).toEqual(['error.message', 'message']);
expect(baselineTotalDocCount).toEqual(5000000);
expect(deviationTotalDocCount).toEqual(5000000);
expect(esClientFieldCapsMock).toHaveBeenCalledTimes(1);
expect(esClientSearchMock).toHaveBeenCalledTimes(2);
});
it('returns field candidates and total hits for ecommerce mappings', async () => {
const esClientFieldCapsMock = jest.fn(() => fieldCapsEcommerceMock);
const esClientSearchMock = jest.fn((req: estypes.SearchRequest): estypes.SearchResponse => {
return {
hits: {
hits: [],
total: { value: 5000000 },
},
} as unknown as estypes.SearchResponse;
});
const esClientMock = {
fieldCaps: esClientFieldCapsMock,
search: esClientSearchMock,
} as unknown as ElasticsearchClient;
const {
baselineTotalDocCount,
deviationTotalDocCount,
fieldCandidates,
textFieldCandidates,
} = await fetchIndexInfo(esClientMock, paramsSearchQueryMock);
expect(fieldCandidates).toEqual([
'category.keyword',
'currency',
'customer_first_name.keyword',
'customer_full_name.keyword',
'customer_gender',
'customer_id',
'customer_last_name.keyword',
'customer_phone',
'day_of_week',
'email',
'geoip.city_name',
'geoip.continent_name',
'geoip.country_iso_code',
'geoip.region_name',
'manufacturer.keyword',
'order_id',
'products._id.keyword',
'products.category.keyword',
'products.manufacturer.keyword',
'products.product_name.keyword',
'products.sku',
'sku',
'type',
'user',
]);
expect(textFieldCandidates).toEqual([]);
expect(baselineTotalDocCount).toEqual(5000000);
expect(deviationTotalDocCount).toEqual(5000000);
expect(esClientFieldCapsMock).toHaveBeenCalledTimes(1);
expect(esClientSearchMock).toHaveBeenCalledTimes(2);
});
it('returns field candidates and total hits for large-arrays mappings', async () => {
const esClientFieldCapsMock = jest.fn(() => fieldCapsLargeArraysMock);
const esClientSearchMock = jest.fn((req: estypes.SearchRequest): estypes.SearchResponse => {
return {
hits: {
hits: [],
total: { value: 5000000 },
},
} as unknown as estypes.SearchResponse;
});
const esClientMock = {
fieldCaps: esClientFieldCapsMock,
search: esClientSearchMock,
} as unknown as ElasticsearchClient;
const {
baselineTotalDocCount,
deviationTotalDocCount,
fieldCandidates,
textFieldCandidates,
} = await fetchIndexInfo(esClientMock, paramsSearchQueryMock);
expect(fieldCandidates).toEqual(['items']);
expect(textFieldCandidates).toEqual([]);
expect(baselineTotalDocCount).toEqual(5000000);
expect(deviationTotalDocCount).toEqual(5000000);
expect(esClientFieldCapsMock).toHaveBeenCalledTimes(1);
expect(esClientSearchMock).toHaveBeenCalledTimes(2);
});
});
});

View file

@ -101,10 +101,13 @@ export const fetchIndexInfo = async (
const fieldCandidates: string[] = [...acceptableFields].filter(
(field) => !textFieldCandidatesOverridesWithKeywordPostfix.includes(field)
);
const textFieldCandidates: string[] = [...acceptableTextFields].filter(
(field) =>
!allFieldNames.includes(`${field}.keyword`) || textFieldCandidatesOverrides.includes(field)
);
const textFieldCandidates: string[] = [...acceptableTextFields].filter((field) => {
const fieldName = field.replace(new RegExp(/\.text$/), '');
return (
(!fieldCandidates.includes(fieldName) && !fieldCandidates.includes(`${fieldName}.keyword`)) ||
textFieldCandidatesOverrides.includes(field)
);
});
const baselineTotalDocCount = (respBaselineTotalDocCount.hits.total as estypes.SearchTotalHits)
.value;
@ -112,8 +115,8 @@ export const fetchIndexInfo = async (
.value;
return {
fieldCandidates,
textFieldCandidates,
fieldCandidates: fieldCandidates.sort(),
textFieldCandidates: textFieldCandidates.sort(),
baselineTotalDocCount,
deviationTotalDocCount,
zeroDocsFallback: baselineTotalDocCount === 0 || deviationTotalDocCount === 0,