mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 09:48:58 -04:00
[ML] Transforms: Fix handling of fields with keyword mapping available (#98882)
- For groupby/agg configs, removes the .keyword postfix for the agg name and field name being displayed. The config itself will still use the field name including .keyword. - For histogram charts, if available, query data using the .keyword field. This enables support for charts for terms when there's both a text and keyword variant. - Fixes isKeywordDuplicate check for field names with multiple dots in them.
This commit is contained in:
parent
bc352c033c
commit
dfcb1794d4
11 changed files with 203 additions and 34 deletions
59
x-pack/plugins/transform/common/utils/field_utils.test.ts
Normal file
59
x-pack/plugins/transform/common/utils/field_utils.test.ts
Normal file
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { hasKeywordDuplicate, isKeywordDuplicate, removeKeywordPostfix } from './field_utils';
|
||||
|
||||
const allFields = new Set([
|
||||
'field1',
|
||||
'field2',
|
||||
'field2.keyword',
|
||||
'field3.keyword',
|
||||
'field3.keyword.keyword',
|
||||
'field4.keyword.b',
|
||||
'field4.keyword.b.keyword',
|
||||
]);
|
||||
|
||||
describe('field_utils: hasKeywordDuplicate()', () => {
|
||||
it('returns true when a corresponding keyword field is available', () => {
|
||||
expect(hasKeywordDuplicate('field2', allFields)).toBe(true);
|
||||
expect(hasKeywordDuplicate('field3.keyword', allFields)).toBe(true);
|
||||
expect(hasKeywordDuplicate('field4.keyword.b', allFields)).toBe(true);
|
||||
});
|
||||
it('returns false when a corresponding keyword field is not available', () => {
|
||||
expect(hasKeywordDuplicate('field1', allFields)).toBe(false);
|
||||
expect(hasKeywordDuplicate('field2.keyword', allFields)).toBe(false);
|
||||
expect(hasKeywordDuplicate('field3.keyword.keyword', allFields)).toBe(false);
|
||||
expect(hasKeywordDuplicate('field4.keyword.b.keyword', allFields)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('field_utils: isKeywordDuplicate()', () => {
|
||||
it('returns true when a corresponding field without keyword postfix is available', () => {
|
||||
expect(isKeywordDuplicate('field2.keyword', allFields)).toBe(true);
|
||||
expect(isKeywordDuplicate('field3.keyword.keyword', allFields)).toBe(true);
|
||||
expect(isKeywordDuplicate('field4.keyword.b.keyword', allFields)).toBe(true);
|
||||
});
|
||||
it('returns false when a corresponding field without keyword postfix is not available', () => {
|
||||
expect(isKeywordDuplicate('field1', allFields)).toBe(false);
|
||||
expect(isKeywordDuplicate('field2', allFields)).toBe(false);
|
||||
expect(isKeywordDuplicate('field3.keyword', allFields)).toBe(false);
|
||||
expect(isKeywordDuplicate('field4.keyword.b', allFields)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('field_utils: removeKeywordPostfix()', () => {
|
||||
it('removes the keyword postfix', () => {
|
||||
expect(removeKeywordPostfix('field2.keyword')).toBe('field2');
|
||||
expect(removeKeywordPostfix('field3.keyword.keyword')).toBe('field3.keyword');
|
||||
expect(removeKeywordPostfix('field4.keyword.b.keyword')).toBe('field4.keyword.b');
|
||||
});
|
||||
it("returns the field name as is when there's no keyword postfix", () => {
|
||||
expect(removeKeywordPostfix('field1')).toBe('field1');
|
||||
expect(removeKeywordPostfix('field2')).toBe('field2');
|
||||
expect(removeKeywordPostfix('field4.keyword.b')).toBe('field4.keyword.b');
|
||||
});
|
||||
});
|
20
x-pack/plugins/transform/common/utils/field_utils.ts
Normal file
20
x-pack/plugins/transform/common/utils/field_utils.ts
Normal file
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
export const KEYWORD_POSTFIX = '.keyword';
|
||||
|
||||
// checks if fieldName has a `fieldName.keyword` equivalent in the set of all field names.
|
||||
export const hasKeywordDuplicate = (fieldName: string, fieldNamesSet: Set<string>): boolean =>
|
||||
fieldNamesSet.has(`${fieldName}${KEYWORD_POSTFIX}`);
|
||||
|
||||
// checks if a fieldName ends with `.keyword` and has a field name equivalent without the postfix in the set of all field names.
|
||||
export const isKeywordDuplicate = (fieldName: string, fieldNamesSet: Set<string>): boolean =>
|
||||
fieldName.endsWith(KEYWORD_POSTFIX) && fieldNamesSet.has(removeKeywordPostfix(fieldName));
|
||||
|
||||
// removes the `.keyword` postfix form a field name if applicable
|
||||
export const removeKeywordPostfix = (fieldName: string): string =>
|
||||
fieldName.replace(new RegExp(`${KEYWORD_POSTFIX}$`), '');
|
|
@ -14,6 +14,11 @@ import {
|
|||
isEsSearchResponse,
|
||||
isFieldHistogramsResponseSchema,
|
||||
} from '../../../common/api_schemas/type_guards';
|
||||
import {
|
||||
hasKeywordDuplicate,
|
||||
isKeywordDuplicate,
|
||||
removeKeywordPostfix,
|
||||
} from '../../../common/utils/field_utils';
|
||||
import type { EsSorting, UseIndexDataReturnType } from '../../shared_imports';
|
||||
|
||||
import { getErrorMessage } from '../../../common/utils/errors';
|
||||
|
@ -209,14 +214,25 @@ export const useIndexData = (
|
|||
};
|
||||
|
||||
const fetchColumnChartsData = async function () {
|
||||
const allIndexPatternFieldNames = new Set(indexPattern.fields.map((f) => f.name));
|
||||
const columnChartsData = await api.getHistogramsForFields(
|
||||
indexPattern.title,
|
||||
columns
|
||||
.filter((cT) => dataGrid.visibleColumns.includes(cT.id))
|
||||
.map((cT) => ({
|
||||
fieldName: cT.id,
|
||||
type: getFieldType(cT.schema),
|
||||
})),
|
||||
.map((cT) => {
|
||||
// If a column field name has a corresponding keyword field,
|
||||
// fetch the keyword field instead to be able to do aggregations.
|
||||
const fieldName = cT.id;
|
||||
return hasKeywordDuplicate(fieldName, allIndexPatternFieldNames)
|
||||
? {
|
||||
fieldName: `${fieldName}.keyword`,
|
||||
type: getFieldType(undefined),
|
||||
}
|
||||
: {
|
||||
fieldName,
|
||||
type: getFieldType(cT.schema),
|
||||
};
|
||||
}),
|
||||
isDefaultQuery(query) ? matchAllQuery : query,
|
||||
combinedRuntimeMappings
|
||||
);
|
||||
|
@ -226,7 +242,15 @@ export const useIndexData = (
|
|||
return;
|
||||
}
|
||||
|
||||
setColumnCharts(columnChartsData);
|
||||
setColumnCharts(
|
||||
// revert field names with `.keyword` used to do aggregations to their original column name
|
||||
columnChartsData.map((d) => ({
|
||||
...d,
|
||||
...(isKeywordDuplicate(d.id, allIndexPatternFieldNames)
|
||||
? { id: removeKeywordPostfix(d.id) }
|
||||
: {}),
|
||||
}))
|
||||
);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
|
|
|
@ -13,6 +13,8 @@ import {
|
|||
} from '../../../../../../../../../../src/plugins/data/public';
|
||||
|
||||
import { getNestedProperty } from '../../../../../../../common/utils/object_utils';
|
||||
import { removeKeywordPostfix } from '../../../../../../../common/utils/field_utils';
|
||||
|
||||
import { isRuntimeMappings } from '../../../../../../../common/shared_imports';
|
||||
|
||||
import {
|
||||
|
@ -93,41 +95,44 @@ export function getPivotDropdownOptions(
|
|||
|
||||
const combinedFields = [...indexPatternFields, ...runtimeFields].sort(sortByLabel);
|
||||
combinedFields.forEach((field) => {
|
||||
const rawFieldName = field.name;
|
||||
const displayFieldName = removeKeywordPostfix(rawFieldName);
|
||||
|
||||
// Group by
|
||||
const availableGroupByAggs: [] = getNestedProperty(pivotGroupByFieldSupport, field.type);
|
||||
|
||||
if (availableGroupByAggs !== undefined) {
|
||||
availableGroupByAggs.forEach((groupByAgg) => {
|
||||
// Aggregation name for the group-by is the plain field name. Illegal characters will be removed.
|
||||
const aggName = field.name.replace(illegalEsAggNameChars, '').trim();
|
||||
const aggName = displayFieldName.replace(illegalEsAggNameChars, '').trim();
|
||||
// Option name in the dropdown for the group-by is in the form of `sum(fieldname)`.
|
||||
const dropDownName = `${groupByAgg}(${field.name})`;
|
||||
const dropDownName = `${groupByAgg}(${displayFieldName})`;
|
||||
const groupByOption: DropDownLabel = { label: dropDownName };
|
||||
groupByOptions.push(groupByOption);
|
||||
groupByOptionsData[dropDownName] = getDefaultGroupByConfig(
|
||||
aggName,
|
||||
dropDownName,
|
||||
field.name,
|
||||
rawFieldName,
|
||||
groupByAgg
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
// Aggregations
|
||||
const aggOption: DropDownOption = { label: field.name, options: [] };
|
||||
const aggOption: DropDownOption = { label: displayFieldName, options: [] };
|
||||
const availableAggs: [] = getNestedProperty(pivotAggsFieldSupport, field.type);
|
||||
|
||||
if (availableAggs !== undefined) {
|
||||
availableAggs.forEach((agg) => {
|
||||
// Aggregation name is formatted like `fieldname.sum`. Illegal characters will be removed.
|
||||
const aggName = `${field.name.replace(illegalEsAggNameChars, '').trim()}.${agg}`;
|
||||
const aggName = `${displayFieldName.replace(illegalEsAggNameChars, '').trim()}.${agg}`;
|
||||
// Option name in the dropdown for the aggregation is in the form of `sum(fieldname)`.
|
||||
const dropDownName = `${agg}(${field.name})`;
|
||||
const dropDownName = `${agg}(${displayFieldName})`;
|
||||
aggOption.options.push({ label: dropDownName });
|
||||
aggOptionsData[dropDownName] = getDefaultAggregationConfig(
|
||||
aggName,
|
||||
dropDownName,
|
||||
field.name,
|
||||
rawFieldName,
|
||||
agg
|
||||
);
|
||||
});
|
||||
|
|
|
@ -61,6 +61,7 @@ import { registerTransformsAuditMessagesRoutes } from './transforms_audit_messag
|
|||
import { registerTransformNodesRoutes } from './transforms_nodes';
|
||||
import { IIndexPattern } from '../../../../../../src/plugins/data/common/index_patterns';
|
||||
import { isLatestTransform } from '../../../common/types/transform';
|
||||
import { isKeywordDuplicate } from '../../../common/utils/field_utils';
|
||||
|
||||
enum TRANSFORM_ACTIONS {
|
||||
STOP = 'stop',
|
||||
|
@ -562,9 +563,7 @@ const previewTransformHandler: RequestHandler<
|
|||
).reduce((acc, [fieldName, fieldCaps]) => {
|
||||
const fieldDefinition = Object.values(fieldCaps)[0];
|
||||
const isMetaField = fieldDefinition.type.startsWith('_') || fieldName === '_doc_count';
|
||||
const isKeywordDuplicate =
|
||||
fieldName.endsWith('.keyword') && fieldNamesSet.has(fieldName.split('.keyword')[0]);
|
||||
if (isMetaField || isKeywordDuplicate) {
|
||||
if (isMetaField || isKeywordDuplicate(fieldName, fieldNamesSet)) {
|
||||
return acc;
|
||||
}
|
||||
acc[fieldName] = { ...fieldDefinition };
|
||||
|
|
|
@ -45,8 +45,8 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
|
||||
const pivotGroupByEntries = [
|
||||
{
|
||||
identifier: 'terms(category.keyword)',
|
||||
label: 'category.keyword',
|
||||
identifier: 'terms(category)',
|
||||
label: 'category',
|
||||
},
|
||||
{
|
||||
identifier: 'date_histogram(order_date)',
|
||||
|
|
|
@ -32,7 +32,7 @@ function getTransformConfig(): TransformPivotConfig {
|
|||
aggregations: { 'products.base_price.avg': { avg: { field: 'products.base_price' } } },
|
||||
},
|
||||
description:
|
||||
'ecommerce batch transform with avg(products.base_price) grouped by terms(category.keyword)',
|
||||
'ecommerce batch transform with avg(products.base_price) grouped by terms(category)',
|
||||
frequency: '3s',
|
||||
settings: {
|
||||
max_page_search_size: 250,
|
||||
|
|
|
@ -17,6 +17,7 @@ import {
|
|||
} from './index';
|
||||
|
||||
export default function ({ getService }: FtrProviderContext) {
|
||||
const canvasElement = getService('canvasElement');
|
||||
const esArchiver = getService('esArchiver');
|
||||
const transform = getService('transform');
|
||||
|
||||
|
@ -40,8 +41,8 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
source: 'ft_ecommerce',
|
||||
groupByEntries: [
|
||||
{
|
||||
identifier: 'terms(category.keyword)',
|
||||
label: 'category.keyword',
|
||||
identifier: 'terms(category)',
|
||||
label: 'category',
|
||||
} as GroupByEntry,
|
||||
{
|
||||
identifier: 'date_histogram(order_date)',
|
||||
|
@ -85,16 +86,16 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
],
|
||||
transformId: `ec_1_${Date.now()}`,
|
||||
transformDescription:
|
||||
'ecommerce batch transform with groups terms(category.keyword) + date_histogram(order_date) 1m and aggregation avg(products.base_price)',
|
||||
'ecommerce batch transform with groups terms(category) + date_histogram(order_date) 1m and aggregation avg(products.base_price)',
|
||||
get destinationIndex(): string {
|
||||
return `user-${this.transformId}`;
|
||||
},
|
||||
discoverAdjustSuperDatePicker: true,
|
||||
expected: {
|
||||
pivotAdvancedEditorValueArr: ['{', ' "group_by": {', ' "category.keyword": {'],
|
||||
pivotAdvancedEditorValueArr: ['{', ' "group_by": {', ' "category": {'],
|
||||
pivotAdvancedEditorValue: {
|
||||
group_by: {
|
||||
'category.keyword': {
|
||||
category: {
|
||||
terms: {
|
||||
field: 'category.keyword',
|
||||
},
|
||||
|
@ -156,7 +157,15 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
rows: 5,
|
||||
},
|
||||
histogramCharts: [
|
||||
{ chartAvailable: false, id: 'category', legend: 'Chart not supported.' },
|
||||
{
|
||||
chartAvailable: true,
|
||||
id: 'category',
|
||||
legend: '6 categories',
|
||||
colorStats: [
|
||||
{ color: '#000000', percentage: 45 },
|
||||
{ color: '#54B399', percentage: 55 },
|
||||
],
|
||||
},
|
||||
{
|
||||
chartAvailable: true,
|
||||
id: 'currency',
|
||||
|
@ -166,8 +175,24 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
{ color: '#54B399', percentage: 90 },
|
||||
],
|
||||
},
|
||||
{ chartAvailable: false, id: 'customer_first_name', legend: 'Chart not supported.' },
|
||||
{ chartAvailable: false, id: 'customer_full_name', legend: 'Chart not supported.' },
|
||||
{
|
||||
chartAvailable: true,
|
||||
id: 'customer_first_name',
|
||||
legend: 'top 20 of 46 categories',
|
||||
colorStats: [
|
||||
{ color: '#000000', percentage: 60 },
|
||||
{ color: '#54B399', percentage: 35 },
|
||||
],
|
||||
},
|
||||
{
|
||||
chartAvailable: true,
|
||||
id: 'customer_full_name',
|
||||
legend: 'top 20 of 3321 categories',
|
||||
colorStats: [
|
||||
{ color: '#000000', percentage: 25 },
|
||||
{ color: '#54B399', percentage: 67 },
|
||||
],
|
||||
},
|
||||
{
|
||||
chartAvailable: true,
|
||||
id: 'customer_gender',
|
||||
|
@ -186,7 +211,15 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
{ color: '#000000', percentage: 60 },
|
||||
],
|
||||
},
|
||||
{ chartAvailable: false, id: 'customer_last_name', legend: 'Chart not supported.' },
|
||||
{
|
||||
chartAvailable: true,
|
||||
id: 'customer_last_name',
|
||||
legend: 'top 20 of 183 categories',
|
||||
colorStats: [
|
||||
{ color: '#000000', percentage: 25 },
|
||||
{ color: '#54B399', percentage: 70 },
|
||||
],
|
||||
},
|
||||
{
|
||||
chartAvailable: true,
|
||||
id: 'customer_phone',
|
||||
|
@ -403,6 +436,9 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
await transform.wizard.assertAdvancedQueryEditorSwitchExists();
|
||||
await transform.wizard.assertAdvancedQueryEditorSwitchCheckState(false);
|
||||
|
||||
// Disable anti-aliasing to stabilize canvas image rendering assertions
|
||||
await canvasElement.disableAntiAliasing();
|
||||
|
||||
await transform.testExecution.logTestStep('enables the index preview histogram charts');
|
||||
await transform.wizard.enableIndexPreviewHistogramCharts(true);
|
||||
|
||||
|
@ -415,6 +451,8 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
);
|
||||
}
|
||||
|
||||
await canvasElement.resetAntiAliasing();
|
||||
|
||||
if (isPivotTransformTestData(testData)) {
|
||||
await transform.testExecution.logTestStep('adds the group by entries');
|
||||
for (const [index, entry] of testData.groupByEntries.entries()) {
|
||||
|
|
|
@ -53,7 +53,15 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
chartAvailable: true,
|
||||
id: '@timestamp',
|
||||
},
|
||||
{ chartAvailable: false, id: '@version', legend: 'Chart not supported.' },
|
||||
{
|
||||
chartAvailable: true,
|
||||
id: '@version',
|
||||
legend: '1 category',
|
||||
colorStats: [
|
||||
{ color: '#000000', percentage: 10 },
|
||||
{ color: '#54B399', percentage: 90 },
|
||||
],
|
||||
},
|
||||
{
|
||||
chartAvailable: true,
|
||||
id: 'airline',
|
||||
|
@ -67,7 +75,8 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
chartAvailable: true,
|
||||
id: 'responsetime',
|
||||
colorStats: [
|
||||
{ color: '#54B399', percentage: 5 },
|
||||
// below 10% threshold
|
||||
// { color: '#54B399', percentage: 5 },
|
||||
{ color: '#000000', percentage: 95 },
|
||||
],
|
||||
},
|
||||
|
@ -84,11 +93,20 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
chartAvailable: true,
|
||||
id: 'rt_responsetime_x_2',
|
||||
colorStats: [
|
||||
{ color: '#54B399', percentage: 5 },
|
||||
// below 10% threshold
|
||||
// { color: '#54B399', percentage: 5 },
|
||||
{ color: '#000000', percentage: 95 },
|
||||
],
|
||||
},
|
||||
{ chartAvailable: false, id: 'type', legend: 'Chart not supported.' },
|
||||
{
|
||||
chartAvailable: true,
|
||||
id: 'type',
|
||||
legend: '1 category',
|
||||
colorStats: [
|
||||
{ color: '#000000', percentage: 10 },
|
||||
{ color: '#54B399', percentage: 90 },
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
const testDataList: Array<PivotTransformTestData | LatestTransformTestData> = [
|
||||
|
|
|
@ -20,7 +20,7 @@ function getTransformConfig(): TransformPivotConfig {
|
|||
aggregations: { 'products.base_price.avg': { avg: { field: 'products.base_price' } } },
|
||||
},
|
||||
description:
|
||||
'ecommerce batch transform with avg(products.base_price) grouped by terms(category.keyword)',
|
||||
'ecommerce batch transform with avg(products.base_price) grouped by terms(category)',
|
||||
dest: { index: `user-ec_2_${date}` },
|
||||
};
|
||||
}
|
||||
|
|
|
@ -262,12 +262,18 @@ export function TransformWizardProvider({ getService, getPageObjects }: FtrProvi
|
|||
`[data-test-subj="mlDataGridChart-${id}-histogram"] .echCanvasRenderer`,
|
||||
sortedExpectedColorStats,
|
||||
undefined,
|
||||
4
|
||||
10
|
||||
);
|
||||
|
||||
expect(actualColorStats.length).to.eql(
|
||||
sortedExpectedColorStats.length,
|
||||
`Expected and actual color stats for column '${expected.id}' should have the same amount of elements. Expected: ${sortedExpectedColorStats.length} (got ${actualColorStats.length})`
|
||||
`Expected and actual color stats for column '${
|
||||
expected.id
|
||||
}' should have the same amount of elements. Expected: ${
|
||||
sortedExpectedColorStats.length
|
||||
} '${JSON.stringify(sortedExpectedColorStats)}' (got ${
|
||||
actualColorStats.length
|
||||
} '${JSON.stringify(actualColorStats)}')`
|
||||
);
|
||||
expect(actualColorStats.every((d) => d.withinTolerance)).to.eql(
|
||||
true,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue