[8.x] [ML] Transforms: Improve data grid memoization. (#195394) (#195975)

# Backport

This will backport the following commits from `main` to `8.x`:
- [[ML] Transforms: Improve data grid memoization.
(#195394)](https://github.com/elastic/kibana/pull/195394)

<!--- Backport version: 9.4.3 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Walter
Rafelsberger","email":"walter.rafelsberger@elastic.co"},"sourceCommit":{"committedDate":"2024-10-11T18:18:11Z","message":"[ML]
Transforms: Improve data grid memoization. (#195394)\n\n##
Summary\r\n\r\nPart of #178606 and #151664.\r\n\r\n- Removes some unused
code related to identifying populated index\r\nfields.\r\n- Changes
`useIndexData()` to accept just one config options arg instead\r\nof
individual args.\r\n- Improves data grid
memoziation.\r\n\r\nImprovements tested locally:\r\n\r\n####
`many_fields` dataset (no timestamp)\r\n\r\n- `main`: `~22s` and 10 data
grid rerenders until many_fields data set\r\nloaded. The transform
config dropdown are hardly usable and super slow,\r\neach edit causes 3
data grid rerenders.\r\n- This PR: `~4.5s` and 7 data grid rerenders
until many_fields data set\r\nloaded. The transform config dropdowns are
a bit slow but usable!\r\n\r\n#### `kibana_sample_data_logs` dataset
(whole dataset in the past to\r\ntest rerenders on load without
data)\r\n\r\n- `main`: 5 rerenders.\r\n- This PR: 3 rerenders\r\n\r\n###
Checklist\r\n\r\n- [x] [Unit or
functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere
updated or added to match the most common scenarios\r\n- [ ] [Flaky
Test\r\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1)
was\r\nused on any tests changed\r\n- [x] This was checked for breaking
API changes and was
[labeled\r\nappropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)","sha":"869ceec5ca8a1156d077bb2a888a91ef73e30511","branchLabelMapping":{"^v9.0.0$":"main","^v8.16.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":[":ml","release_note:skip","Feature:Transforms","v9.0.0","v8.16.0","backport:version"],"title":"[ML]
Transforms: Improve data grid
memoization.","number":195394,"url":"https://github.com/elastic/kibana/pull/195394","mergeCommit":{"message":"[ML]
Transforms: Improve data grid memoization. (#195394)\n\n##
Summary\r\n\r\nPart of #178606 and #151664.\r\n\r\n- Removes some unused
code related to identifying populated index\r\nfields.\r\n- Changes
`useIndexData()` to accept just one config options arg instead\r\nof
individual args.\r\n- Improves data grid
memoziation.\r\n\r\nImprovements tested locally:\r\n\r\n####
`many_fields` dataset (no timestamp)\r\n\r\n- `main`: `~22s` and 10 data
grid rerenders until many_fields data set\r\nloaded. The transform
config dropdown are hardly usable and super slow,\r\neach edit causes 3
data grid rerenders.\r\n- This PR: `~4.5s` and 7 data grid rerenders
until many_fields data set\r\nloaded. The transform config dropdowns are
a bit slow but usable!\r\n\r\n#### `kibana_sample_data_logs` dataset
(whole dataset in the past to\r\ntest rerenders on load without
data)\r\n\r\n- `main`: 5 rerenders.\r\n- This PR: 3 rerenders\r\n\r\n###
Checklist\r\n\r\n- [x] [Unit or
functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere
updated or added to match the most common scenarios\r\n- [ ] [Flaky
Test\r\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1)
was\r\nused on any tests changed\r\n- [x] This was checked for breaking
API changes and was
[labeled\r\nappropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)","sha":"869ceec5ca8a1156d077bb2a888a91ef73e30511"}},"sourceBranch":"main","suggestedTargetBranches":["8.x"],"targetPullRequestStates":[{"branch":"main","label":"v9.0.0","branchLabelMappingKey":"^v9.0.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/195394","number":195394,"mergeCommit":{"message":"[ML]
Transforms: Improve data grid memoization. (#195394)\n\n##
Summary\r\n\r\nPart of #178606 and #151664.\r\n\r\n- Removes some unused
code related to identifying populated index\r\nfields.\r\n- Changes
`useIndexData()` to accept just one config options arg instead\r\nof
individual args.\r\n- Improves data grid
memoziation.\r\n\r\nImprovements tested locally:\r\n\r\n####
`many_fields` dataset (no timestamp)\r\n\r\n- `main`: `~22s` and 10 data
grid rerenders until many_fields data set\r\nloaded. The transform
config dropdown are hardly usable and super slow,\r\neach edit causes 3
data grid rerenders.\r\n- This PR: `~4.5s` and 7 data grid rerenders
until many_fields data set\r\nloaded. The transform config dropdowns are
a bit slow but usable!\r\n\r\n#### `kibana_sample_data_logs` dataset
(whole dataset in the past to\r\ntest rerenders on load without
data)\r\n\r\n- `main`: 5 rerenders.\r\n- This PR: 3 rerenders\r\n\r\n###
Checklist\r\n\r\n- [x] [Unit or
functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere
updated or added to match the most common scenarios\r\n- [ ] [Flaky
Test\r\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1)
was\r\nused on any tests changed\r\n- [x] This was checked for breaking
API changes and was
[labeled\r\nappropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)","sha":"869ceec5ca8a1156d077bb2a888a91ef73e30511"}},{"branch":"8.x","label":"v8.16.0","branchLabelMappingKey":"^v8.16.0$","isSourceBranch":false,"state":"NOT_CREATED"}]}]
BACKPORT-->

Co-authored-by: Walter Rafelsberger <walter.rafelsberger@elastic.co>
This commit is contained in:
Kibana Machine 2024-10-12 06:59:01 +11:00 committed by GitHub
parent bf56b4ea3a
commit c9b0d86381
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 163 additions and 175 deletions

View file

@ -154,7 +154,9 @@ export const DataGrid: FC<Props> = memo(
const wrapperEl = useRef<HTMLDivElement>(null);
if (status === INDEX_STATUS.LOADED && data.length === 0) {
// `UNUSED` occurs if we were not able to identify populated fields, because
// then the query to fetch actual docs would not have triggered yet.
if ((status === INDEX_STATUS.UNUSED || status === INDEX_STATUS.LOADED) && data.length === 0) {
return (
<div data-test-subj={`${dataTestSubj} empty`}>
{isWithHeader(props) && <DataGridTitle title={props.title} />}

View file

@ -168,35 +168,54 @@ export const useDataGrid = (
}
}, [chartsVisible, rowCount, rowCountRelation]);
return {
ccsWarning,
chartsVisible,
chartsButtonVisible: true,
columnsWithCharts,
errorMessage,
invalidSortingColumnns,
noDataMessage,
onChangeItemsPerPage,
onChangePage,
onSort,
pagination,
resetPagination,
rowCount,
rowCountRelation,
setColumnCharts,
setCcsWarning,
setErrorMessage,
setNoDataMessage,
setPagination,
setRowCountInfo,
setSortingColumns,
setStatus,
setTableItems,
setVisibleColumns,
sortingColumns,
status,
tableItems,
toggleChartVisibility,
visibleColumns,
};
return useMemo(
() => ({
ccsWarning,
chartsVisible,
chartsButtonVisible: true,
columnsWithCharts,
errorMessage,
invalidSortingColumnns,
noDataMessage,
onChangeItemsPerPage,
onChangePage,
onSort,
pagination,
resetPagination,
rowCount,
rowCountRelation,
setColumnCharts,
setCcsWarning,
setErrorMessage,
setNoDataMessage,
setPagination,
setRowCountInfo,
setSortingColumns,
setStatus,
setTableItems,
setVisibleColumns,
sortingColumns,
status,
tableItems,
toggleChartVisibility,
visibleColumns,
}),
// custom comparison
// eslint-disable-next-line react-hooks/exhaustive-deps
[
ccsWarning,
chartsVisible,
columnsWithCharts,
errorMessage,
invalidSortingColumnns,
noDataMessage,
pagination,
rowCount,
rowCountRelation,
sortingColumns,
status,
tableItems,
visibleColumns,
]
);
};

View file

@ -67,6 +67,9 @@ dataStart.search.search = jest.fn(({ params }: IKibanaSearchRequest) => {
});
}) as ISearchGeneric;
// Replace mock to support tests for `use_index_data`.
coreSetup.http.post = jest.fn().mockResolvedValue([]);
const appDependencies: AppDependencies = {
analytics: coreStart.analytics,
application: coreStart.application,

View file

@ -12,7 +12,7 @@ import { renderHook } from '@testing-library/react-hooks';
import { __IntlProvider as IntlProvider } from '@kbn/i18n-react';
import type { CoreSetup } from '@kbn/core/public';
import { DataGrid, type UseIndexDataReturnType } from '@kbn/ml-data-grid';
import { DataGrid, type UseIndexDataReturnType, INDEX_STATUS } from '@kbn/ml-data-grid';
import type { RuntimeMappings } from '@kbn/ml-runtime-field-utils';
import type { SimpleQuery } from '@kbn/ml-query-utils';
@ -40,6 +40,37 @@ const runtimeMappings: RuntimeMappings = {
const queryClient = new QueryClient();
describe('Transform: useIndexData()', () => {
test('empty populatedFields does not trigger loading', async () => {
const wrapper: FC<PropsWithChildren<unknown>> = ({ children }) => (
<QueryClientProvider client={queryClient}>
<IntlProvider locale="en">{children}</IntlProvider>
</QueryClientProvider>
);
const { result, waitForNextUpdate } = renderHook(
() =>
useIndexData({
dataView: {
id: 'the-id',
getIndexPattern: () => 'the-index-pattern',
fields: [],
} as unknown as SearchItems['dataView'],
query,
combinedRuntimeMappings: runtimeMappings,
populatedFields: [],
}),
{ wrapper }
);
const IndexObj: UseIndexDataReturnType = result.current;
await waitForNextUpdate();
expect(IndexObj.errorMessage).toBe('');
expect(IndexObj.status).toBe(INDEX_STATUS.UNUSED);
expect(IndexObj.tableItems).toEqual([]);
});
test('dataView set triggers loading', async () => {
const wrapper: FC<PropsWithChildren<unknown>> = ({ children }) => (
<QueryClientProvider client={queryClient}>
@ -49,15 +80,29 @@ describe('Transform: useIndexData()', () => {
const { result, waitForNextUpdate } = renderHook(
() =>
useIndexData(
{
useIndexData({
dataView: {
id: 'the-id',
getIndexPattern: () => 'the-index-pattern',
fields: [],
metaFields: [],
// minimal mock of DataView fields (array with getByName method)
fields: new (class DataViewFields extends Array<{ name: string }> {
getByName(id: string) {
return this.find((d) => d.name === id);
}
})(
{
name: 'the-populated-field',
},
{
name: 'the-unpopulated-field',
}
),
} as unknown as SearchItems['dataView'],
query,
runtimeMappings
),
combinedRuntimeMappings: runtimeMappings,
populatedFields: ['the-populated-field'],
}),
{ wrapper }
);
@ -66,7 +111,7 @@ describe('Transform: useIndexData()', () => {
await waitForNextUpdate();
expect(IndexObj.errorMessage).toBe('');
expect(IndexObj.status).toBe(1);
expect(IndexObj.status).toBe(INDEX_STATUS.LOADING);
expect(IndexObj.tableItems).toEqual([]);
});
});
@ -81,7 +126,12 @@ describe('Transform: <DataGrid /> with useIndexData()', () => {
const Wrapper = () => {
const props = {
...useIndexData(dataView, { match_all: {} }, runtimeMappings),
...useIndexData({
dataView,
query: { match_all: {} },
combinedRuntimeMappings: runtimeMappings,
populatedFields: ['the-populated-field'],
}),
copyToClipboard: 'the-copy-to-clipboard-code',
copyToClipboardDescription: 'the-copy-to-clipboard-description',
dataTestSubj: 'the-data-test-subj',
@ -109,42 +159,4 @@ describe('Transform: <DataGrid /> with useIndexData()', () => {
).not.toBeInTheDocument();
});
});
test('Cross-cluster search warning', async () => {
// Arrange
const dataView = {
getIndexPattern: () => 'remote:the-index-pattern-title',
fields: [] as any[],
} as SearchItems['dataView'];
const Wrapper = () => {
const props = {
...useIndexData(dataView, { match_all: {} }, runtimeMappings),
copyToClipboard: 'the-copy-to-clipboard-code',
copyToClipboardDescription: 'the-copy-to-clipboard-description',
dataTestSubj: 'the-data-test-subj',
title: 'the-index-preview-title',
toastNotifications: {} as CoreSetup['notifications']['toasts'],
};
return <DataGrid {...props} />;
};
render(
<QueryClientProvider client={queryClient}>
<IntlProvider locale="en">
<Wrapper />
</IntlProvider>
</QueryClientProvider>
);
// Act
// Assert
await waitFor(() => {
expect(screen.queryByText('the-index-preview-title')).toBeInTheDocument();
expect(
screen.queryByText('Cross-cluster search returned no fields data.')
).toBeInTheDocument();
});
});
});

View file

@ -43,13 +43,16 @@ import type { SearchItems } from './use_search_items';
import { useGetHistogramsForFields } from './use_get_histograms_for_fields';
import { useDataSearch } from './use_data_search';
export const useIndexData = (
dataView: SearchItems['dataView'],
query: TransformConfigQuery,
combinedRuntimeMappings?: StepDefineExposedState['runtimeMappings'],
timeRangeMs?: TimeRangeMs,
populatedFields?: string[]
): UseIndexDataReturnType => {
interface UseIndexDataOptions {
dataView: SearchItems['dataView'];
query: TransformConfigQuery;
populatedFields: string[];
combinedRuntimeMappings?: StepDefineExposedState['runtimeMappings'];
timeRangeMs?: TimeRangeMs;
}
export const useIndexData = (options: UseIndexDataOptions): UseIndexDataReturnType => {
const { dataView, query, populatedFields, combinedRuntimeMappings, timeRangeMs } = options;
const { analytics } = useAppDependencies();
// Store the performance metric's start time using a ref
@ -59,11 +62,10 @@ export const useIndexData = (
const indexPattern = useMemo(() => dataView.getIndexPattern(), [dataView]);
const toastNotifications = useToastNotifications();
const baseFilterCriteria = buildBaseFilterCriteria(
dataView.timeFieldName,
timeRangeMs?.from,
timeRangeMs?.to,
query
const baseFilterCriteria = useMemo(
() =>
buildBaseFilterCriteria(dataView.timeFieldName, timeRangeMs?.from, timeRangeMs?.to, query),
[dataView.timeFieldName, query, timeRangeMs]
);
const defaultQuery = useMemo(
@ -71,85 +73,23 @@ export const useIndexData = (
[baseFilterCriteria, dataView, timeRangeMs]
);
const queryWithBaseFilterCriteria = {
bool: {
filter: baseFilterCriteria,
},
};
// Fetch 500 random documents to determine populated fields.
// This is a workaround to avoid passing potentially thousands of unpopulated fields
// (for example, as part of filebeat/metricbeat/ECS based indices)
// to the data grid component which would significantly slow down the page.
const {
error: dataViewFieldsError,
data: dataViewFieldsData,
isError: dataViewFieldsIsError,
isLoading: dataViewFieldsIsLoading,
} = useDataSearch(
{
index: indexPattern,
body: {
fields: ['*'],
_source: false,
query: {
function_score: {
query: defaultQuery,
random_score: {},
},
},
size: 500,
const queryWithBaseFilterCriteria = useMemo(
() => ({
bool: {
filter: baseFilterCriteria,
},
},
// Check whether fetching should be enabled
// If populatedFields are not provided, make own request to calculate
!Array.isArray(populatedFields) &&
!(dataView.timeFieldName !== undefined && timeRangeMs === undefined)
}),
[baseFilterCriteria]
);
useEffect(() => {
if (dataViewFieldsIsLoading && !dataViewFieldsIsError) {
setErrorMessage('');
setStatus(INDEX_STATUS.LOADING);
} else if (dataViewFieldsError !== null) {
setErrorMessage(getErrorMessage(dataViewFieldsError));
setStatus(INDEX_STATUS.ERROR);
} else if (
!dataViewFieldsIsLoading &&
!dataViewFieldsIsError &&
dataViewFieldsData !== undefined
) {
const isCrossClusterSearch = indexPattern.includes(':');
const isMissingFields = dataViewFieldsData.hits.hits.every(
(d) => typeof d.fields === 'undefined'
);
setCcsWarning(isCrossClusterSearch && isMissingFields);
setStatus(INDEX_STATUS.LOADED);
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [dataViewFieldsData, dataViewFieldsError, dataViewFieldsIsError, dataViewFieldsIsLoading]);
const dataViewFields = useMemo(() => {
let allPopulatedFields = Array.isArray(populatedFields) ? populatedFields : [];
if (populatedFields === undefined && dataViewFieldsData) {
// Get all field names for each returned doc and flatten it
// to a list of unique field names used across all docs.
const docs = dataViewFieldsData.hits.hits.map((d) => getProcessedFields(d.fields ?? {}));
allPopulatedFields = [...new Set(docs.map(Object.keys).flat(1))];
}
const allPopulatedFields = Array.isArray(populatedFields) ? populatedFields : [];
const allDataViewFields = getFieldsFromKibanaDataView(dataView);
return allPopulatedFields.filter((d) => allDataViewFields.includes(d)).sort();
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [dataViewFieldsData, populatedFields]);
}, [populatedFields]);
const columns: EuiDataGridColumn[] = useMemo(() => {
if (typeof dataViewFields === 'undefined') {
return [];
}
let result: Array<{ id: string; schema: string | undefined }> = [];
// Get the the runtime fields that are defined from API field and index patterns
@ -206,7 +146,9 @@ export const useIndexData = (
error: dataGridDataError,
data: dataGridData,
isError: dataGridDataIsError,
isLoading: dataGridDataIsLoading,
// React Query v4 has the weird behavior that `isLoading` is `true` on load
// when a query is disabled, that's why we need to use `isFetching` here.
isFetching: dataGridDataIsLoading,
} = useDataSearch(
{
index: indexPattern,
@ -223,7 +165,7 @@ export const useIndexData = (
},
},
// Check whether fetching should be enabled
dataViewFields !== undefined
dataViewFields.length > 0
);
useEffect(() => {
@ -309,7 +251,7 @@ export const useIndexData = (
if (
dataGrid.status === INDEX_STATUS.LOADED &&
dataViewFields !== undefined &&
dataViewFields.length > 0 &&
Array.isArray(histogramsForFieldsData) &&
histogramsForFieldsData.length > 0 &&
loadIndexDataStartTime.current !== undefined
@ -325,8 +267,11 @@ export const useIndexData = (
});
}
return {
...dataGrid,
renderCellValue,
};
return useMemo(
() => ({
...dataGrid,
renderCellValue,
}),
[dataGrid, renderCellValue]
);
};

View file

@ -126,16 +126,23 @@ export const StepDefineForm: FC<StepDefineFormProps> = React.memo((props) => {
const { runtimeMappings } = stepDefineForm.runtimeMappingsEditor.state;
const fieldStatsContext = useFieldStatsFlyoutContext();
const indexPreviewProps = {
...useIndexData(
dataView,
transformConfigQuery,
runtimeMappings,
timeRangeMs,
const populatedFields = useMemo(
() =>
isPopulatedFields(fieldStatsContext?.populatedFields)
? [...fieldStatsContext.populatedFields]
: []
),
: [],
[fieldStatsContext?.populatedFields]
);
const indexPreviewProps = {
...useIndexData({
dataView,
query: transformConfigQuery,
populatedFields,
combinedRuntimeMappings: runtimeMappings,
timeRangeMs,
}),
dataTestSubj: 'transformIndexPreview',
toastNotifications,
};