mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 17:28:26 -04:00
[ML] Explain Log Rate Spikes: Additional unit tests. (#147451)
This breaks out inline code in `x-pack/plugins/aiops/server/routes/explain_log_rate_spikes.ts` to functions in separate files and adds jest unit tests for each function. The mocks used for jest unit tests are used as expected data in the API integration tests. This allows to make sure that the static mocks are still up to date should we have to update API integration tests based on upstream tests.
This commit is contained in:
parent
36978389a0
commit
d7be514b94
47 changed files with 1287 additions and 527 deletions
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ChangePointGroup } from '@kbn/ml-agg-utils';
|
||||
|
||||
export const changePointGroups: ChangePointGroup[] = [
|
||||
{
|
||||
id: '2038579476',
|
||||
group: [
|
||||
{ fieldName: 'response_code', fieldValue: '500' },
|
||||
{ fieldName: 'url', fieldValue: 'home.php' },
|
||||
],
|
||||
docCount: 792,
|
||||
pValue: 0.010770456205312423,
|
||||
},
|
||||
];
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
export const changePoints = [
|
||||
{
|
||||
fieldName: 'response_code',
|
||||
fieldValue: '500',
|
||||
doc_count: 1821,
|
||||
bg_count: 553,
|
||||
total_doc_count: 4671,
|
||||
total_bg_count: 1975,
|
||||
score: 26.546201745993947,
|
||||
pValue: 2.9589053032077285e-12,
|
||||
normalizedScore: 0.7814127409489161,
|
||||
},
|
||||
{
|
||||
fieldName: 'url',
|
||||
fieldValue: 'home.php',
|
||||
doc_count: 1742,
|
||||
bg_count: 632,
|
||||
total_doc_count: 4671,
|
||||
total_bg_count: 1975,
|
||||
score: 4.53094842981472,
|
||||
pValue: 0.010770456205312423,
|
||||
normalizedScore: 0.10333028878375965,
|
||||
},
|
||||
{
|
||||
fieldName: 'url',
|
||||
fieldValue: 'login.php',
|
||||
doc_count: 1742,
|
||||
bg_count: 632,
|
||||
total_doc_count: 4671,
|
||||
total_bg_count: 1975,
|
||||
score: 4.53094842981472,
|
||||
pValue: 0.010770456205312423,
|
||||
normalizedScore: 0.10333028878375965,
|
||||
},
|
||||
{
|
||||
fieldName: 'user',
|
||||
fieldValue: 'Peter',
|
||||
doc_count: 1981,
|
||||
bg_count: 553,
|
||||
total_doc_count: 4671,
|
||||
total_bg_count: 1975,
|
||||
score: 47.34435085428873,
|
||||
pValue: 2.7454255728359757e-21,
|
||||
normalizedScore: 0.8327337555873047,
|
||||
},
|
||||
];
|
|
@ -0,0 +1,8 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
export const fields = ['response_code', 'url', 'user'];
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ItemsetResult } from '../../types';
|
||||
|
||||
export const filteredFrequentItems: ItemsetResult[] = [
|
||||
{
|
||||
set: { response_code: '500', url: 'home.php' },
|
||||
size: 2,
|
||||
maxPValue: 0.010770456205312423,
|
||||
doc_count: 792,
|
||||
support: 0.5262458471760797,
|
||||
total_doc_count: 1505,
|
||||
},
|
||||
{
|
||||
set: { user: 'Peter', url: 'home.php' },
|
||||
size: 2,
|
||||
maxPValue: 0.010770456205312423,
|
||||
doc_count: 634,
|
||||
support: 0.4212624584717608,
|
||||
total_doc_count: 1505,
|
||||
},
|
||||
];
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ChangePointGroup } from '@kbn/ml-agg-utils';
|
||||
|
||||
export const finalChangePointGroups: ChangePointGroup[] = [
|
||||
{
|
||||
id: '2038579476',
|
||||
group: [
|
||||
{ fieldName: 'response_code', fieldValue: '500', duplicate: false },
|
||||
{ fieldName: 'url', fieldValue: 'home.php', duplicate: false },
|
||||
{ fieldName: 'url', fieldValue: 'login.php', duplicate: false },
|
||||
],
|
||||
docCount: 792,
|
||||
pValue: 0.010770456205312423,
|
||||
},
|
||||
{
|
||||
id: '817080373',
|
||||
group: [{ fieldName: 'user', fieldValue: 'Peter', duplicate: false }],
|
||||
docCount: 1981,
|
||||
pValue: 2.7454255728359757e-21,
|
||||
},
|
||||
];
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ItemsetResult } from '../../types';
|
||||
|
||||
export const frequentItems: ItemsetResult[] = [
|
||||
{
|
||||
set: { response_code: '500', url: 'home.php' },
|
||||
size: 2,
|
||||
maxPValue: 0.010770456205312423,
|
||||
doc_count: 792,
|
||||
support: 0.5262458471760797,
|
||||
total_doc_count: 1505,
|
||||
},
|
||||
{
|
||||
set: { user: 'Peter', url: 'home.php' },
|
||||
size: 2,
|
||||
maxPValue: 0.010770456205312423,
|
||||
doc_count: 634,
|
||||
support: 0.4212624584717608,
|
||||
total_doc_count: 1505,
|
||||
},
|
||||
{
|
||||
set: { response_code: '500', user: 'Mary', url: 'home.php' },
|
||||
size: 3,
|
||||
maxPValue: 0.010770456205312423,
|
||||
doc_count: 396,
|
||||
support: 0.26312292358803985,
|
||||
total_doc_count: 1505,
|
||||
},
|
||||
{
|
||||
set: { response_code: '500', user: 'Paul', url: 'home.php' },
|
||||
size: 3,
|
||||
maxPValue: 0.010770456205312423,
|
||||
doc_count: 396,
|
||||
support: 0.26312292358803985,
|
||||
total_doc_count: 1505,
|
||||
},
|
||||
{
|
||||
set: { response_code: '404', user: 'Peter', url: 'home.php' },
|
||||
size: 3,
|
||||
maxPValue: 0.010770456205312423,
|
||||
doc_count: 317,
|
||||
support: 0.2106312292358804,
|
||||
total_doc_count: 1505,
|
||||
},
|
||||
{
|
||||
set: { response_code: '200', user: 'Peter', url: 'home.php' },
|
||||
size: 3,
|
||||
maxPValue: 0.010770456205312423,
|
||||
doc_count: 317,
|
||||
support: 0.2106312292358804,
|
||||
total_doc_count: 1505,
|
||||
},
|
||||
];
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ChangePointGroup } from '@kbn/ml-agg-utils';
|
||||
|
||||
export const changePointGroups: ChangePointGroup[] = [
|
||||
{
|
||||
id: 'group-1',
|
||||
group: [
|
||||
{
|
||||
fieldName: 'custom_field.keyword',
|
||||
fieldValue: 'deviation',
|
||||
},
|
||||
{
|
||||
fieldName: 'airline',
|
||||
fieldValue: 'UAL',
|
||||
},
|
||||
],
|
||||
docCount: 101,
|
||||
pValue: 0.01,
|
||||
},
|
||||
{
|
||||
id: 'group-2',
|
||||
group: [
|
||||
{
|
||||
fieldName: 'custom_field.keyword',
|
||||
fieldValue: 'deviation',
|
||||
},
|
||||
{
|
||||
fieldName: 'airline',
|
||||
fieldValue: 'AAL',
|
||||
},
|
||||
],
|
||||
docCount: 49,
|
||||
pValue: 0.001,
|
||||
},
|
||||
];
|
33
x-pack/plugins/aiops/common/types.ts
Normal file
33
x-pack/plugins/aiops/common/types.ts
Normal file
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ChangePoint, FieldValuePair } from '@kbn/ml-agg-utils';
|
||||
|
||||
export interface ChangePointDuplicateGroup {
|
||||
keys: Pick<ChangePoint, keyof ChangePoint>;
|
||||
group: ChangePoint[];
|
||||
}
|
||||
|
||||
export type FieldValuePairCounts = Record<string, Record<string, number>>;
|
||||
|
||||
export interface ItemsetResult {
|
||||
set: Record<FieldValuePair['fieldName'], FieldValuePair['fieldValue']>;
|
||||
size: number;
|
||||
maxPValue: number;
|
||||
doc_count: number;
|
||||
support: number;
|
||||
total_doc_count: number;
|
||||
}
|
||||
|
||||
export interface SimpleHierarchicalTreeNode {
|
||||
name: string;
|
||||
set: FieldValuePair[];
|
||||
docCount: number;
|
||||
pValue: number | null;
|
||||
children: SimpleHierarchicalTreeNode[];
|
||||
addNode: (node: SimpleHierarchicalTreeNode) => void;
|
||||
}
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
import type { ChangePoint } from '@kbn/ml-agg-utils';
|
||||
|
||||
import type { GroupTableItem } from '../../components/spike_analysis_table/spike_analysis_table_groups';
|
||||
import type { GroupTableItem } from '../../components/spike_analysis_table/types';
|
||||
|
||||
import { buildBaseFilterCriteria } from './query_utils';
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
|
|||
import { Query } from '@kbn/es-query';
|
||||
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
|
||||
import type { ChangePoint, FieldValuePair } from '@kbn/ml-agg-utils';
|
||||
import type { GroupTableItem } from '../../components/spike_analysis_table/spike_analysis_table_groups';
|
||||
import type { GroupTableItem } from '../../components/spike_analysis_table/types';
|
||||
|
||||
/*
|
||||
* Contains utility functions for building and processing queries.
|
||||
|
|
|
@ -25,14 +25,17 @@ import type { WindowParameters } from '@kbn/aiops-utils';
|
|||
import { i18n } from '@kbn/i18n';
|
||||
import { FormattedMessage } from '@kbn/i18n-react';
|
||||
import type { Query } from '@kbn/es-query';
|
||||
import type { FieldValuePair } from '@kbn/ml-agg-utils';
|
||||
|
||||
import { useAiopsAppContext } from '../../hooks/use_aiops_app_context';
|
||||
import { initialState, streamReducer } from '../../../common/api/stream_reducer';
|
||||
import type { ApiExplainLogRateSpikes } from '../../../common/api';
|
||||
|
||||
import { SpikeAnalysisGroupsTable } from '../spike_analysis_table';
|
||||
import { SpikeAnalysisTable } from '../spike_analysis_table';
|
||||
import {
|
||||
getGroupTableItems,
|
||||
SpikeAnalysisTable,
|
||||
SpikeAnalysisGroupsTable,
|
||||
} from '../spike_analysis_table';
|
||||
import {} from '../spike_analysis_table';
|
||||
import { useSpikeAnalysisTableRowContext } from '../spike_analysis_table/spike_analysis_table_row_provider';
|
||||
|
||||
const groupResultsMessage = i18n.translate(
|
||||
|
@ -159,35 +162,10 @@ export const ExplainLogRateSpikesAnalysis: FC<ExplainLogRateSpikesAnalysisProps>
|
|||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, []);
|
||||
|
||||
const groupTableItems = useMemo(() => {
|
||||
const tableItems = data.changePointsGroups.map(({ id, group, docCount, histogram, pValue }) => {
|
||||
const sortedGroup = group.sort((a, b) =>
|
||||
a.fieldName > b.fieldName ? 1 : b.fieldName > a.fieldName ? -1 : 0
|
||||
);
|
||||
const dedupedGroup: FieldValuePair[] = [];
|
||||
const repeatedValues: FieldValuePair[] = [];
|
||||
|
||||
sortedGroup.forEach((pair) => {
|
||||
const { fieldName, fieldValue } = pair;
|
||||
if (pair.duplicate === false) {
|
||||
dedupedGroup.push({ fieldName, fieldValue });
|
||||
} else {
|
||||
repeatedValues.push({ fieldName, fieldValue });
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
id,
|
||||
docCount,
|
||||
pValue,
|
||||
group: dedupedGroup,
|
||||
repeatedValues,
|
||||
histogram,
|
||||
};
|
||||
});
|
||||
|
||||
return tableItems;
|
||||
}, [data.changePointsGroups]);
|
||||
const groupTableItems = useMemo(
|
||||
() => getGroupTableItems(data.changePointsGroups),
|
||||
[data.changePointsGroups]
|
||||
);
|
||||
|
||||
const shouldRerunAnalysis = useMemo(
|
||||
() =>
|
||||
|
|
|
@ -38,7 +38,7 @@ import { SearchPanel } from '../search_panel';
|
|||
|
||||
import { restorableDefaults } from './explain_log_rate_spikes_app_state';
|
||||
import { ExplainLogRateSpikesAnalysis } from './explain_log_rate_spikes_analysis';
|
||||
import type { GroupTableItem } from '../spike_analysis_table/spike_analysis_table_groups';
|
||||
import type { GroupTableItem } from '../spike_analysis_table/types';
|
||||
import { useSpikeAnalysisTableRowContext } from '../spike_analysis_table/spike_analysis_table_row_provider';
|
||||
|
||||
// TODO port to `@emotion/react` once `useEuiBreakpoint` is available https://github.com/elastic/eui/pull/6057
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { finalChangePointGroups } from '../../../common/__mocks__/artificial_logs/final_change_point_groups';
|
||||
|
||||
import { getGroupTableItems } from './get_group_table_items';
|
||||
|
||||
describe('getGroupTableItems', () => {
|
||||
it('transforms change point groups into table items', () => {
|
||||
const groupTableItems = getGroupTableItems(finalChangePointGroups);
|
||||
|
||||
expect(groupTableItems).toEqual([
|
||||
{
|
||||
docCount: 792,
|
||||
group: [
|
||||
{
|
||||
fieldName: 'response_code',
|
||||
fieldValue: '500',
|
||||
},
|
||||
{
|
||||
fieldName: 'url',
|
||||
fieldValue: 'home.php',
|
||||
},
|
||||
{
|
||||
fieldName: 'url',
|
||||
fieldValue: 'login.php',
|
||||
},
|
||||
],
|
||||
histogram: undefined,
|
||||
id: '2038579476',
|
||||
pValue: 0.010770456205312423,
|
||||
repeatedValues: [],
|
||||
},
|
||||
{
|
||||
docCount: 1981,
|
||||
group: [
|
||||
{
|
||||
fieldName: 'user',
|
||||
fieldValue: 'Peter',
|
||||
},
|
||||
],
|
||||
histogram: undefined,
|
||||
id: '817080373',
|
||||
pValue: 2.7454255728359757e-21,
|
||||
repeatedValues: [],
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ChangePointGroup, FieldValuePair } from '@kbn/ml-agg-utils';
|
||||
|
||||
import type { GroupTableItem } from './types';
|
||||
|
||||
export function getGroupTableItems(changePointsGroups: ChangePointGroup[]): GroupTableItem[] {
|
||||
const tableItems = changePointsGroups.map(({ id, group, docCount, histogram, pValue }) => {
|
||||
const sortedGroup = group.sort((a, b) =>
|
||||
a.fieldName > b.fieldName ? 1 : b.fieldName > a.fieldName ? -1 : 0
|
||||
);
|
||||
const dedupedGroup: FieldValuePair[] = [];
|
||||
const repeatedValues: FieldValuePair[] = [];
|
||||
|
||||
sortedGroup.forEach((pair) => {
|
||||
const { fieldName, fieldValue } = pair;
|
||||
if (pair.duplicate === false) {
|
||||
dedupedGroup.push({ fieldName, fieldValue });
|
||||
} else {
|
||||
repeatedValues.push({ fieldName, fieldValue });
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
id,
|
||||
docCount,
|
||||
pValue,
|
||||
group: dedupedGroup,
|
||||
repeatedValues,
|
||||
histogram,
|
||||
};
|
||||
});
|
||||
|
||||
return tableItems;
|
||||
}
|
|
@ -5,5 +5,6 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
export { getGroupTableItems } from './get_group_table_items';
|
||||
export { SpikeAnalysisTable } from './spike_analysis_table';
|
||||
export { SpikeAnalysisGroupsTable } from './spike_analysis_table_groups';
|
||||
|
|
|
@ -37,6 +37,7 @@ import { MiniHistogram } from '../mini_histogram';
|
|||
import { getFailedTransactionsCorrelationImpactLabel } from './get_failed_transactions_correlation_impact_label';
|
||||
import { SpikeAnalysisTable } from './spike_analysis_table';
|
||||
import { useSpikeAnalysisTableRowContext } from './spike_analysis_table_row_provider';
|
||||
import type { GroupTableItem } from './types';
|
||||
|
||||
const NARROW_COLUMN_WIDTH = '120px';
|
||||
const EXPAND_COLUMN_WIDTH = '40px';
|
||||
|
@ -54,15 +55,6 @@ const viewInDiscoverMessage = i18n.translate(
|
|||
}
|
||||
);
|
||||
|
||||
export interface GroupTableItem {
|
||||
id: string;
|
||||
docCount: number;
|
||||
pValue: number | null;
|
||||
group: FieldValuePair[];
|
||||
repeatedValues: FieldValuePair[];
|
||||
histogram: ChangePoint['histogram'];
|
||||
}
|
||||
|
||||
interface SpikeAnalysisTableProps {
|
||||
changePoints: ChangePoint[];
|
||||
groupTableItems: GroupTableItem[];
|
||||
|
|
|
@ -17,7 +17,7 @@ import React, {
|
|||
|
||||
import type { ChangePoint } from '@kbn/ml-agg-utils';
|
||||
|
||||
import type { GroupTableItem } from './spike_analysis_table_groups';
|
||||
import type { GroupTableItem } from './types';
|
||||
|
||||
type ChangePointOrNull = ChangePoint | null;
|
||||
type GroupOrNull = GroupTableItem | null;
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ChangePoint, FieldValuePair } from '@kbn/ml-agg-utils';
|
||||
|
||||
export interface GroupTableItem {
|
||||
id: string;
|
||||
docCount: number;
|
||||
pValue: number | null;
|
||||
group: FieldValuePair[];
|
||||
repeatedValues: FieldValuePair[];
|
||||
histogram: ChangePoint['histogram'];
|
||||
}
|
|
@ -14,7 +14,7 @@ import type { ChangePoint } from '@kbn/ml-agg-utils';
|
|||
import type { Query } from '@kbn/es-query';
|
||||
|
||||
import { buildBaseFilterCriteria } from './application/utils/query_utils';
|
||||
import { GroupTableItem } from './components/spike_analysis_table/spike_analysis_table_groups';
|
||||
import { GroupTableItem } from './components/spike_analysis_table/types';
|
||||
|
||||
export interface DocumentCountStats {
|
||||
interval?: number;
|
||||
|
|
|
@ -27,7 +27,7 @@ import {
|
|||
import { useTimefilter } from './use_time_filter';
|
||||
import { useDocumentCountStats } from './use_document_count_stats';
|
||||
import type { Dictionary } from './use_url_state';
|
||||
import type { GroupTableItem } from '../components/spike_analysis_table/spike_analysis_table_groups';
|
||||
import type { GroupTableItem } from '../components/spike_analysis_table/types';
|
||||
|
||||
const DEFAULT_BAR_TARGET = 75;
|
||||
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
*/
|
||||
|
||||
import { queue } from 'async';
|
||||
import { uniqWith, isEqual } from 'lodash';
|
||||
|
||||
import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
|
||||
|
||||
|
@ -23,7 +22,6 @@ import type {
|
|||
NumericHistogramField,
|
||||
} from '@kbn/ml-agg-utils';
|
||||
import { fetchHistogramsForFields } from '@kbn/ml-agg-utils';
|
||||
import { stringHash } from '@kbn/ml-string-hash';
|
||||
|
||||
import {
|
||||
addChangePointsAction,
|
||||
|
@ -43,22 +41,13 @@ import { API_ENDPOINT } from '../../common/api';
|
|||
import { isRequestAbortedError } from '../lib/is_request_aborted_error';
|
||||
import type { AiopsLicense } from '../types';
|
||||
|
||||
import { duplicateIdentifier } from './queries/duplicate_identifier';
|
||||
import { fetchChangePointPValues } from './queries/fetch_change_point_p_values';
|
||||
import { fetchIndexInfo } from './queries/fetch_index_info';
|
||||
import {
|
||||
dropDuplicates,
|
||||
fetchFrequentItems,
|
||||
groupDuplicates,
|
||||
} from './queries/fetch_frequent_items';
|
||||
import type { ItemsetResult } from './queries/fetch_frequent_items';
|
||||
import { dropDuplicates, fetchFrequentItems } from './queries/fetch_frequent_items';
|
||||
import { getHistogramQuery } from './queries/get_histogram_query';
|
||||
import {
|
||||
getFieldValuePairCounts,
|
||||
getSimpleHierarchicalTree,
|
||||
getSimpleHierarchicalTreeLeaves,
|
||||
markDuplicates,
|
||||
} from './queries/get_simple_hierarchical_tree';
|
||||
import { getGroupFilter } from './queries/get_group_filter';
|
||||
import { getChangePointGroups } from './queries/get_change_point_groups';
|
||||
|
||||
// 10s ping frequency to keep the stream alive.
|
||||
const PING_FREQUENCY = 10000;
|
||||
|
@ -434,25 +423,9 @@ export const defineExplainLogRateSpikesRoute = (
|
|||
})
|
||||
);
|
||||
|
||||
// To optimize the `frequent_items` query, we identify duplicate change points by count attributes.
|
||||
// Note this is a compromise and not 100% accurate because there could be change points that
|
||||
// have the exact same counts but still don't co-occur.
|
||||
const duplicateIdentifier: Array<keyof ChangePoint> = [
|
||||
'doc_count',
|
||||
'bg_count',
|
||||
'total_doc_count',
|
||||
'total_bg_count',
|
||||
];
|
||||
|
||||
// These are the deduplicated change points we pass to the `frequent_items` aggregation.
|
||||
// Deduplicated change points we pass to the `frequent_items` aggregation.
|
||||
const deduplicatedChangePoints = dropDuplicates(changePoints, duplicateIdentifier);
|
||||
|
||||
// We use the grouped change points to later repopulate
|
||||
// the `frequent_items` result with the missing duplicates.
|
||||
const groupedChangePoints = groupDuplicates(changePoints, duplicateIdentifier).filter(
|
||||
(g) => g.group.length > 1
|
||||
);
|
||||
|
||||
try {
|
||||
const { fields, df } = await fetchFrequentItems(
|
||||
client,
|
||||
|
@ -475,143 +448,9 @@ export const defineExplainLogRateSpikesRoute = (
|
|||
}
|
||||
|
||||
if (fields.length > 0 && df.length > 0) {
|
||||
// The way the `frequent_items` aggregations works could return item sets that include
|
||||
// field/value pairs that are not part of the original list of significant change points.
|
||||
// This cleans up groups and removes those unrelated field/value pairs.
|
||||
const filteredDf = df
|
||||
.map((fi, fiIndex) => {
|
||||
const updatedSet = Object.entries(fi.set).reduce<ItemsetResult['set']>(
|
||||
(set, [field, value]) => {
|
||||
if (
|
||||
changePoints.some(
|
||||
(cp) => cp.fieldName === field && cp.fieldValue === value
|
||||
)
|
||||
) {
|
||||
set[field] = value;
|
||||
}
|
||||
return set;
|
||||
},
|
||||
{}
|
||||
);
|
||||
const changePointGroups = getChangePointGroups(df, changePoints, fields);
|
||||
|
||||
// only assign the updated reduced set if it doesn't already match
|
||||
// an existing set. if there's a match just add an empty set
|
||||
// so it will be filtered in the last step.
|
||||
fi.set = df.some((d, dIndex) => fiIndex !== dIndex && isEqual(fi.set, d.set))
|
||||
? {}
|
||||
: updatedSet;
|
||||
|
||||
fi.size = Object.keys(fi.set).length;
|
||||
|
||||
return fi;
|
||||
})
|
||||
.filter((fi) => fi.size > 1);
|
||||
|
||||
// `frequent_items` returns lot of different small groups of field/value pairs that co-occur.
|
||||
// The following steps analyse these small groups, identify overlap between these groups,
|
||||
// and then summarize them in larger groups where possible.
|
||||
|
||||
// Get a tree structure based on `frequent_items`.
|
||||
const { root } = getSimpleHierarchicalTree(filteredDf, true, false, fields);
|
||||
|
||||
// Each leave of the tree will be a summarized group of co-occuring field/value pairs.
|
||||
const treeLeaves = getSimpleHierarchicalTreeLeaves(root, []);
|
||||
|
||||
// To be able to display a more cleaned up results table in the UI, we identify field/value pairs
|
||||
// that occur in multiple groups. This will allow us to highlight field/value pairs that are
|
||||
// unique to a group in a better way. This step will also re-add duplicates we identified in the
|
||||
// beginning and didn't pass on to the `frequent_items` agg.
|
||||
const fieldValuePairCounts = getFieldValuePairCounts(treeLeaves);
|
||||
const changePointGroups = markDuplicates(treeLeaves, fieldValuePairCounts).map(
|
||||
(g) => {
|
||||
const group = [...g.group];
|
||||
|
||||
for (const groupItem of g.group) {
|
||||
const { duplicate } = groupItem;
|
||||
const duplicates = groupedChangePoints.find((d) =>
|
||||
d.group.some(
|
||||
(dg) =>
|
||||
dg.fieldName === groupItem.fieldName &&
|
||||
dg.fieldValue === groupItem.fieldValue
|
||||
)
|
||||
);
|
||||
|
||||
if (duplicates !== undefined) {
|
||||
group.push(
|
||||
...duplicates.group.map((d) => {
|
||||
return {
|
||||
fieldName: d.fieldName,
|
||||
fieldValue: d.fieldValue,
|
||||
duplicate,
|
||||
};
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
...g,
|
||||
group: uniqWith(group, (a, b) => isEqual(a, b)),
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
// Some field/value pairs might not be part of the `frequent_items` result set, for example
|
||||
// because they don't co-occur with other field/value pairs or because of the limits we set on the query.
|
||||
// In this next part we identify those missing pairs and add them as individual groups.
|
||||
const missingChangePoints = deduplicatedChangePoints.filter((cp) => {
|
||||
return !changePointGroups.some((cpg) => {
|
||||
return cpg.group.some(
|
||||
(d) => d.fieldName === cp.fieldName && d.fieldValue === cp.fieldValue
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
changePointGroups.push(
|
||||
...missingChangePoints.map(
|
||||
({ fieldName, fieldValue, doc_count: docCount, pValue }) => {
|
||||
const duplicates = groupedChangePoints.find((d) =>
|
||||
d.group.some(
|
||||
(dg) => dg.fieldName === fieldName && dg.fieldValue === fieldValue
|
||||
)
|
||||
);
|
||||
if (duplicates !== undefined) {
|
||||
return {
|
||||
id: `${stringHash(
|
||||
JSON.stringify(
|
||||
duplicates.group.map((d) => ({
|
||||
fieldName: d.fieldName,
|
||||
fieldValue: d.fieldValue,
|
||||
}))
|
||||
)
|
||||
)}`,
|
||||
group: duplicates.group.map((d) => ({
|
||||
fieldName: d.fieldName,
|
||||
fieldValue: d.fieldValue,
|
||||
duplicate: false,
|
||||
})),
|
||||
docCount,
|
||||
pValue,
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
id: `${stringHash(JSON.stringify({ fieldName, fieldValue }))}`,
|
||||
group: [
|
||||
{
|
||||
fieldName,
|
||||
fieldValue,
|
||||
duplicate: false,
|
||||
},
|
||||
],
|
||||
docCount,
|
||||
pValue,
|
||||
};
|
||||
}
|
||||
}
|
||||
)
|
||||
);
|
||||
|
||||
// Finally, we'll find out if there's at least one group with at least two items,
|
||||
// We'll find out if there's at least one group with at least two items,
|
||||
// only then will we return the groups to the clients and make the grouping option available.
|
||||
const maxItems = Math.max(...changePointGroups.map((g) => g.group.length));
|
||||
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ChangePoint } from '@kbn/ml-agg-utils';
|
||||
|
||||
// To optimize the `frequent_items` query, we identify duplicate change points by count attributes.
|
||||
// Note this is a compromise and not 100% accurate because there could be change points that
|
||||
// have the exact same counts but still don't co-occur.
|
||||
export const duplicateIdentifier: Array<keyof ChangePoint> = [
|
||||
'doc_count',
|
||||
'bg_count',
|
||||
'total_doc_count',
|
||||
'total_bg_count',
|
||||
];
|
|
@ -11,9 +11,11 @@ import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
|
|||
|
||||
import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server';
|
||||
import type { Logger } from '@kbn/logging';
|
||||
import { type ChangePoint, type FieldValuePair, RANDOM_SAMPLER_SEED } from '@kbn/ml-agg-utils';
|
||||
import { type ChangePoint, RANDOM_SAMPLER_SEED } from '@kbn/ml-agg-utils';
|
||||
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
|
||||
|
||||
import type { ChangePointDuplicateGroup, ItemsetResult } from '../../../common/types';
|
||||
|
||||
const FREQUENT_ITEMS_FIELDS_LIMIT = 15;
|
||||
|
||||
interface FrequentItemsAggregation extends estypes.AggregationsSamplerAggregation {
|
||||
|
@ -34,10 +36,6 @@ export function dropDuplicates(cps: ChangePoint[], uniqueFields: Array<keyof Cha
|
|||
return uniqWith(cps, (a, b) => isEqual(pick(a, uniqueFields), pick(b, uniqueFields)));
|
||||
}
|
||||
|
||||
interface ChangePointDuplicateGroup {
|
||||
keys: Pick<ChangePoint, keyof ChangePoint>;
|
||||
group: ChangePoint[];
|
||||
}
|
||||
export function groupDuplicates(cps: ChangePoint[], uniqueFields: Array<keyof ChangePoint>) {
|
||||
const groups: ChangePointDuplicateGroup[] = [];
|
||||
|
||||
|
@ -226,12 +224,3 @@ export async function fetchFrequentItems(
|
|||
totalDocCount: totalDocCountFi,
|
||||
};
|
||||
}
|
||||
|
||||
export interface ItemsetResult {
|
||||
set: Record<FieldValuePair['fieldName'], FieldValuePair['fieldValue']>;
|
||||
size: number;
|
||||
maxPValue: number;
|
||||
doc_count: number;
|
||||
support: number;
|
||||
total_doc_count: number;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { fields } from '../../../common/__mocks__/artificial_logs/fields';
|
||||
import { frequentItems } from '../../../common/__mocks__/artificial_logs/frequent_items';
|
||||
import { changePoints } from '../../../common/__mocks__/artificial_logs/change_points';
|
||||
import { finalChangePointGroups } from '../../../common/__mocks__/artificial_logs/final_change_point_groups';
|
||||
|
||||
import { getChangePointGroups } from './get_change_point_groups';
|
||||
|
||||
describe('getChangePointGroups', () => {
|
||||
it('gets change point groups', () => {
|
||||
const changePointGroups = getChangePointGroups(frequentItems, changePoints, fields);
|
||||
|
||||
expect(changePointGroups).toEqual(finalChangePointGroups);
|
||||
});
|
||||
});
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ChangePoint, ChangePointGroup } from '@kbn/ml-agg-utils';
|
||||
|
||||
import { duplicateIdentifier } from './duplicate_identifier';
|
||||
import { dropDuplicates, groupDuplicates } from './fetch_frequent_items';
|
||||
import { getFieldValuePairCounts } from './get_field_value_pair_counts';
|
||||
import { getMarkedDuplicates } from './get_marked_duplicates';
|
||||
import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree';
|
||||
import { getSimpleHierarchicalTreeLeaves } from './get_simple_hierarchical_tree_leaves';
|
||||
import { getFilteredFrequentItems } from './get_filtered_frequent_items';
|
||||
import { getGroupsWithReaddedDuplicates } from './get_groups_with_readded_duplicates';
|
||||
import { getMissingChangePoints } from './get_missing_change_points';
|
||||
import { transformChangePointToGroup } from './transform_change_point_to_group';
|
||||
import type { ItemsetResult } from '../../../common/types';
|
||||
|
||||
export function getChangePointGroups(
|
||||
itemsets: ItemsetResult[],
|
||||
changePoints: ChangePoint[],
|
||||
fields: string[]
|
||||
): ChangePointGroup[] {
|
||||
// These are the deduplicated change points we pass to the `frequent_items` aggregation.
|
||||
const deduplicatedChangePoints = dropDuplicates(changePoints, duplicateIdentifier);
|
||||
|
||||
// We use the grouped change points to later repopulate
|
||||
// the `frequent_items` result with the missing duplicates.
|
||||
const groupedChangePoints = groupDuplicates(changePoints, duplicateIdentifier).filter(
|
||||
(g) => g.group.length > 1
|
||||
);
|
||||
|
||||
const filteredDf = getFilteredFrequentItems(itemsets, changePoints);
|
||||
|
||||
// `frequent_items` returns lot of different small groups of field/value pairs that co-occur.
|
||||
// The following steps analyse these small groups, identify overlap between these groups,
|
||||
// and then summarize them in larger groups where possible.
|
||||
|
||||
// Get a tree structure based on `frequent_items`.
|
||||
const { root } = getSimpleHierarchicalTree(filteredDf, true, false, fields);
|
||||
|
||||
// Each leave of the tree will be a summarized group of co-occuring field/value pairs.
|
||||
const treeLeaves = getSimpleHierarchicalTreeLeaves(root, []);
|
||||
|
||||
// To be able to display a more cleaned up results table in the UI, we identify field/value pairs
|
||||
// that occur in multiple groups. This will allow us to highlight field/value pairs that are
|
||||
// unique to a group in a better way. This step will also re-add duplicates we identified in the
|
||||
// beginning and didn't pass on to the `frequent_items` agg.
|
||||
const fieldValuePairCounts = getFieldValuePairCounts(treeLeaves);
|
||||
const changePointGroupsWithMarkedDuplicates = getMarkedDuplicates(
|
||||
treeLeaves,
|
||||
fieldValuePairCounts
|
||||
);
|
||||
const changePointGroups = getGroupsWithReaddedDuplicates(
|
||||
changePointGroupsWithMarkedDuplicates,
|
||||
groupedChangePoints
|
||||
);
|
||||
|
||||
// Some field/value pairs might not be part of the `frequent_items` result set, for example
|
||||
// because they don't co-occur with other field/value pairs or because of the limits we set on the query.
|
||||
// In this next part we identify those missing pairs and add them as individual groups.
|
||||
const missingChangePoints = getMissingChangePoints(deduplicatedChangePoints, changePointGroups);
|
||||
|
||||
changePointGroups.push(
|
||||
...missingChangePoints.map((changePoint) =>
|
||||
transformChangePointToGroup(changePoint, groupedChangePoints)
|
||||
)
|
||||
);
|
||||
|
||||
return changePointGroups;
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { changePointGroups } from '../../../common/__mocks__/farequote/change_point_groups';
|
||||
import { fields } from '../../../common/__mocks__/artificial_logs/fields';
|
||||
import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items';
|
||||
|
||||
import { getFieldValuePairCounts } from './get_field_value_pair_counts';
|
||||
import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree';
|
||||
import { getSimpleHierarchicalTreeLeaves } from './get_simple_hierarchical_tree_leaves';
|
||||
|
||||
describe('getFieldValuePairCounts', () => {
|
||||
it('returns a nested record with field/value pair counts for farequote', () => {
|
||||
const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups);
|
||||
|
||||
expect(fieldValuePairCounts).toEqual({
|
||||
airline: {
|
||||
AAL: 1,
|
||||
UAL: 1,
|
||||
},
|
||||
'custom_field.keyword': {
|
||||
deviation: 2,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('returns a nested record with field/value pair counts for artificial logs', () => {
|
||||
const simpleHierarchicalTree = getSimpleHierarchicalTree(
|
||||
filteredFrequentItems,
|
||||
true,
|
||||
false,
|
||||
fields
|
||||
);
|
||||
const leaves = getSimpleHierarchicalTreeLeaves(simpleHierarchicalTree.root, []);
|
||||
const fieldValuePairCounts = getFieldValuePairCounts(leaves);
|
||||
|
||||
expect(fieldValuePairCounts).toEqual({
|
||||
response_code: {
|
||||
'500': 1,
|
||||
},
|
||||
url: {
|
||||
'home.php': 1,
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ChangePointGroup } from '@kbn/ml-agg-utils';
|
||||
|
||||
import type { FieldValuePairCounts } from '../../../common/types';
|
||||
|
||||
/**
|
||||
* Get a nested record of field/value pairs with counts
|
||||
*/
|
||||
export function getFieldValuePairCounts(cpgs: ChangePointGroup[]): FieldValuePairCounts {
|
||||
return cpgs.reduce<FieldValuePairCounts>((p, { group }) => {
|
||||
group.forEach(({ fieldName, fieldValue }) => {
|
||||
if (p[fieldName] === undefined) {
|
||||
p[fieldName] = {};
|
||||
}
|
||||
p[fieldName][fieldValue] = p[fieldName][fieldValue] ? p[fieldName][fieldValue] + 1 : 1;
|
||||
});
|
||||
return p;
|
||||
}, {});
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { changePoints } from '../../../common/__mocks__/artificial_logs/change_points';
|
||||
import { frequentItems } from '../../../common/__mocks__/artificial_logs/frequent_items';
|
||||
import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items';
|
||||
|
||||
import { getFilteredFrequentItems } from './get_filtered_frequent_items';
|
||||
|
||||
describe('getFilteredFrequentItems', () => {
|
||||
it('filter frequent item set based on provided change points', () => {
|
||||
expect(getFilteredFrequentItems(frequentItems, changePoints)).toStrictEqual(
|
||||
filteredFrequentItems
|
||||
);
|
||||
});
|
||||
});
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { isEqual } from 'lodash';
|
||||
|
||||
import type { ChangePoint } from '@kbn/ml-agg-utils';
|
||||
|
||||
import type { ItemsetResult } from '../../../common/types';
|
||||
|
||||
// The way the `frequent_items` aggregation works could return item sets that include
|
||||
// field/value pairs that are not part of the original list of significant change points.
|
||||
// This cleans up groups and removes those unrelated field/value pairs.
|
||||
export function getFilteredFrequentItems(
|
||||
itemsets: ItemsetResult[],
|
||||
changePoints: ChangePoint[]
|
||||
): ItemsetResult[] {
|
||||
return itemsets.reduce<ItemsetResult[]>((p, itemset, itemsetIndex) => {
|
||||
// Remove field/value pairs not part of the provided change points
|
||||
itemset.set = Object.entries(itemset.set).reduce<ItemsetResult['set']>(
|
||||
(set, [field, value]) => {
|
||||
if (changePoints.some((cp) => cp.fieldName === field && cp.fieldValue === value)) {
|
||||
set[field] = value;
|
||||
}
|
||||
return set;
|
||||
},
|
||||
{}
|
||||
);
|
||||
|
||||
// Only assign the updated reduced set if it doesn't already match
|
||||
// an existing set. if there's a match just add an empty set
|
||||
// so it will be filtered in the last step.
|
||||
if (itemsets.some((d, dIndex) => itemsetIndex !== dIndex && isEqual(itemset.set, d.set))) {
|
||||
return p;
|
||||
}
|
||||
|
||||
// Update the size attribute to match the possibly updated set
|
||||
itemset.size = Object.keys(itemset.set).length;
|
||||
|
||||
p.push(itemset);
|
||||
|
||||
return p;
|
||||
}, []);
|
||||
}
|
|
@ -5,30 +5,13 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import { getGroupFilter } from './get_group_filter';
|
||||
import { finalChangePointGroups } from '../../../common/__mocks__/artificial_logs/final_change_point_groups';
|
||||
|
||||
const changePointGroups = [
|
||||
{
|
||||
id: '2038579476',
|
||||
group: [
|
||||
{ fieldName: 'response_code', fieldValue: '500', duplicate: false },
|
||||
{ fieldName: 'url', fieldValue: 'home.php', duplicate: false },
|
||||
{ fieldName: 'url', fieldValue: 'login.php', duplicate: false },
|
||||
],
|
||||
docCount: 792,
|
||||
pValue: 0.010770456205312423,
|
||||
},
|
||||
{
|
||||
id: '817080373',
|
||||
group: [{ fieldName: 'user', fieldValue: 'Peter', duplicate: false }],
|
||||
docCount: 1981,
|
||||
pValue: 2.7454255728359757e-21,
|
||||
},
|
||||
];
|
||||
import { getGroupFilter } from './get_group_filter';
|
||||
|
||||
describe('getGroupFilter', () => {
|
||||
it('gets a query filter for the change points of a group with multiple values per field', () => {
|
||||
expect(getGroupFilter(changePointGroups[0])).toStrictEqual([
|
||||
expect(getGroupFilter(finalChangePointGroups[0])).toStrictEqual([
|
||||
{
|
||||
term: {
|
||||
response_code: '500',
|
||||
|
@ -43,7 +26,7 @@ describe('getGroupFilter', () => {
|
|||
});
|
||||
|
||||
it('gets a query filter for the change points of a group with just a single field/value', () => {
|
||||
expect(getGroupFilter(changePointGroups[1])).toStrictEqual([
|
||||
expect(getGroupFilter(finalChangePointGroups[1])).toStrictEqual([
|
||||
{
|
||||
term: {
|
||||
user: 'Peter',
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { changePointGroups } from '../../../common/__mocks__/artificial_logs/change_point_groups';
|
||||
import { changePoints } from '../../../common/__mocks__/artificial_logs/change_points';
|
||||
|
||||
import { duplicateIdentifier } from './duplicate_identifier';
|
||||
import { getGroupsWithReaddedDuplicates } from './get_groups_with_readded_duplicates';
|
||||
import { groupDuplicates } from './fetch_frequent_items';
|
||||
import { getFieldValuePairCounts } from './get_field_value_pair_counts';
|
||||
import { getMarkedDuplicates } from './get_marked_duplicates';
|
||||
|
||||
describe('getGroupsWithReaddedDuplicates', () => {
|
||||
it('gets groups with readded duplicates', () => {
|
||||
const groupedChangePoints = groupDuplicates(changePoints, duplicateIdentifier).filter(
|
||||
(g) => g.group.length > 1
|
||||
);
|
||||
|
||||
const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups);
|
||||
const markedDuplicates = getMarkedDuplicates(changePointGroups, fieldValuePairCounts);
|
||||
const groupsWithReaddedDuplicates = getGroupsWithReaddedDuplicates(
|
||||
markedDuplicates,
|
||||
groupedChangePoints
|
||||
);
|
||||
|
||||
expect(groupsWithReaddedDuplicates).toEqual([
|
||||
{
|
||||
docCount: 792,
|
||||
group: [
|
||||
{
|
||||
duplicate: false,
|
||||
fieldName: 'response_code',
|
||||
fieldValue: '500',
|
||||
},
|
||||
{
|
||||
duplicate: false,
|
||||
fieldName: 'url',
|
||||
fieldValue: 'home.php',
|
||||
},
|
||||
{
|
||||
duplicate: false,
|
||||
fieldName: 'url',
|
||||
fieldValue: 'login.php',
|
||||
},
|
||||
],
|
||||
id: '2038579476',
|
||||
pValue: 0.010770456205312423,
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { uniqWith, isEqual } from 'lodash';
|
||||
|
||||
import type { ChangePointGroup } from '@kbn/ml-agg-utils';
|
||||
|
||||
import type { ChangePointDuplicateGroup } from '../../../common/types';
|
||||
|
||||
export function getGroupsWithReaddedDuplicates(
|
||||
groups: ChangePointGroup[],
|
||||
groupedChangePoints: ChangePointDuplicateGroup[]
|
||||
): ChangePointGroup[] {
|
||||
return groups.map((g) => {
|
||||
const group = [...g.group];
|
||||
|
||||
for (const groupItem of g.group) {
|
||||
const { duplicate } = groupItem;
|
||||
const duplicates = groupedChangePoints.find((d) =>
|
||||
d.group.some(
|
||||
(dg) => dg.fieldName === groupItem.fieldName && dg.fieldValue === groupItem.fieldValue
|
||||
)
|
||||
);
|
||||
|
||||
if (duplicates !== undefined) {
|
||||
group.push(
|
||||
...duplicates.group.map((d) => {
|
||||
return {
|
||||
fieldName: d.fieldName,
|
||||
fieldValue: d.fieldValue,
|
||||
duplicate,
|
||||
};
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
...g,
|
||||
group: uniqWith(group, (a, b) => isEqual(a, b)),
|
||||
};
|
||||
});
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { changePointGroups } from '../../../common/__mocks__/farequote/change_point_groups';
|
||||
import { fields } from '../../../common/__mocks__/artificial_logs/fields';
|
||||
import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items';
|
||||
|
||||
import { getFieldValuePairCounts } from './get_field_value_pair_counts';
|
||||
import { getMarkedDuplicates } from './get_marked_duplicates';
|
||||
import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree';
|
||||
import { getSimpleHierarchicalTreeLeaves } from './get_simple_hierarchical_tree_leaves';
|
||||
|
||||
describe('markDuplicates', () => {
|
||||
it('marks duplicates based on change point groups for farequote', () => {
|
||||
const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups);
|
||||
const markedDuplicates = getMarkedDuplicates(changePointGroups, fieldValuePairCounts);
|
||||
|
||||
expect(markedDuplicates).toEqual([
|
||||
{
|
||||
id: 'group-1',
|
||||
group: [
|
||||
{
|
||||
fieldName: 'custom_field.keyword',
|
||||
fieldValue: 'deviation',
|
||||
duplicate: true,
|
||||
},
|
||||
{
|
||||
fieldName: 'airline',
|
||||
fieldValue: 'UAL',
|
||||
duplicate: false,
|
||||
},
|
||||
],
|
||||
docCount: 101,
|
||||
pValue: 0.01,
|
||||
},
|
||||
{
|
||||
id: 'group-2',
|
||||
group: [
|
||||
{
|
||||
fieldName: 'custom_field.keyword',
|
||||
fieldValue: 'deviation',
|
||||
duplicate: true,
|
||||
},
|
||||
{
|
||||
fieldName: 'airline',
|
||||
fieldValue: 'AAL',
|
||||
duplicate: false,
|
||||
},
|
||||
],
|
||||
docCount: 49,
|
||||
pValue: 0.001,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('marks duplicates based on change point groups for artificial logs', () => {
|
||||
const simpleHierarchicalTree = getSimpleHierarchicalTree(
|
||||
filteredFrequentItems,
|
||||
true,
|
||||
false,
|
||||
fields
|
||||
);
|
||||
const leaves = getSimpleHierarchicalTreeLeaves(simpleHierarchicalTree.root, []);
|
||||
const fieldValuePairCounts = getFieldValuePairCounts(leaves);
|
||||
const markedDuplicates = getMarkedDuplicates(leaves, fieldValuePairCounts);
|
||||
|
||||
expect(markedDuplicates).toEqual([
|
||||
{
|
||||
docCount: 792,
|
||||
group: [
|
||||
{
|
||||
duplicate: false,
|
||||
fieldName: 'response_code',
|
||||
fieldValue: '500',
|
||||
},
|
||||
{
|
||||
duplicate: false,
|
||||
fieldName: 'url',
|
||||
fieldValue: 'home.php',
|
||||
},
|
||||
],
|
||||
id: '2038579476',
|
||||
pValue: 0.010770456205312423,
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ChangePointGroup } from '@kbn/ml-agg-utils';
|
||||
|
||||
import type { FieldValuePairCounts } from '../../../common/types';
|
||||
|
||||
/**
|
||||
* Analyse duplicate field/value pairs in change point groups.
|
||||
*/
|
||||
export function getMarkedDuplicates(
|
||||
cpgs: ChangePointGroup[],
|
||||
fieldValuePairCounts: FieldValuePairCounts
|
||||
): ChangePointGroup[] {
|
||||
return cpgs.map((cpg) => {
|
||||
return {
|
||||
...cpg,
|
||||
group: cpg.group.map((g) => {
|
||||
return {
|
||||
...g,
|
||||
duplicate: fieldValuePairCounts[g.fieldName][g.fieldValue] > 1,
|
||||
};
|
||||
}),
|
||||
};
|
||||
});
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { changePointGroups } from '../../../common/__mocks__/artificial_logs/change_point_groups';
|
||||
import { changePoints } from '../../../common/__mocks__/artificial_logs/change_points';
|
||||
|
||||
import { duplicateIdentifier } from './duplicate_identifier';
|
||||
import { getGroupsWithReaddedDuplicates } from './get_groups_with_readded_duplicates';
|
||||
import { dropDuplicates, groupDuplicates } from './fetch_frequent_items';
|
||||
import { getFieldValuePairCounts } from './get_field_value_pair_counts';
|
||||
import { getMarkedDuplicates } from './get_marked_duplicates';
|
||||
import { getMissingChangePoints } from './get_missing_change_points';
|
||||
|
||||
describe('getMissingChangePoints', () => {
|
||||
it('get missing change points', () => {
|
||||
const deduplicatedChangePoints = dropDuplicates(changePoints, duplicateIdentifier);
|
||||
|
||||
const groupedChangePoints = groupDuplicates(changePoints, duplicateIdentifier).filter(
|
||||
(g) => g.group.length > 1
|
||||
);
|
||||
|
||||
const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups);
|
||||
const markedDuplicates = getMarkedDuplicates(changePointGroups, fieldValuePairCounts);
|
||||
const groupsWithReaddedDuplicates = getGroupsWithReaddedDuplicates(
|
||||
markedDuplicates,
|
||||
groupedChangePoints
|
||||
);
|
||||
|
||||
const missingChangePoints = getMissingChangePoints(
|
||||
deduplicatedChangePoints,
|
||||
groupsWithReaddedDuplicates
|
||||
);
|
||||
|
||||
expect(missingChangePoints).toEqual([
|
||||
{
|
||||
bg_count: 553,
|
||||
doc_count: 1981,
|
||||
fieldName: 'user',
|
||||
fieldValue: 'Peter',
|
||||
normalizedScore: 0.8327337555873047,
|
||||
pValue: 2.7454255728359757e-21,
|
||||
score: 47.34435085428873,
|
||||
total_bg_count: 1975,
|
||||
total_doc_count: 4671,
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
|
@ -0,0 +1,19 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ChangePoint, ChangePointGroup } from '@kbn/ml-agg-utils';
|
||||
|
||||
export function getMissingChangePoints(
|
||||
deduplicatedChangePoints: ChangePoint[],
|
||||
changePointGroups: ChangePointGroup[]
|
||||
) {
|
||||
return deduplicatedChangePoints.filter((cp) => {
|
||||
return !changePointGroups.some((cpg) => {
|
||||
return cpg.group.some((d) => d.fieldName === cp.fieldName && d.fieldValue === cp.fieldValue);
|
||||
});
|
||||
});
|
||||
}
|
|
@ -5,101 +5,50 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ChangePointGroup } from '@kbn/ml-agg-utils';
|
||||
import { fields } from '../../../common/__mocks__/artificial_logs/fields';
|
||||
import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items';
|
||||
|
||||
import { getFieldValuePairCounts, markDuplicates } from './get_simple_hierarchical_tree';
|
||||
import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree';
|
||||
|
||||
const changePointGroups: ChangePointGroup[] = [
|
||||
{
|
||||
id: 'group-1',
|
||||
group: [
|
||||
{
|
||||
fieldName: 'custom_field.keyword',
|
||||
fieldValue: 'deviation',
|
||||
describe('getSimpleHierarchicalTree', () => {
|
||||
it('returns the hierarchical tree', () => {
|
||||
// stringify and again parse the tree to remove attached methods
|
||||
// and make it comparable against a static representation.
|
||||
expect(
|
||||
JSON.parse(
|
||||
JSON.stringify(getSimpleHierarchicalTree(filteredFrequentItems, true, false, fields))
|
||||
)
|
||||
).toEqual({
|
||||
root: {
|
||||
name: '',
|
||||
set: [],
|
||||
docCount: 0,
|
||||
pValue: 0,
|
||||
children: [
|
||||
{
|
||||
name: "792/1505 500 home.php '*'",
|
||||
set: [
|
||||
{ fieldName: 'response_code', fieldValue: '500' },
|
||||
{ fieldName: 'url', fieldValue: 'home.php' },
|
||||
],
|
||||
docCount: 792,
|
||||
pValue: 0.010770456205312423,
|
||||
children: [
|
||||
{
|
||||
name: "792/1505 500 home.php '*'",
|
||||
set: [
|
||||
{ fieldName: 'response_code', fieldValue: '500' },
|
||||
{ fieldName: 'url', fieldValue: 'home.php' },
|
||||
],
|
||||
docCount: 792,
|
||||
pValue: 0.010770456205312423,
|
||||
children: [],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
fieldName: 'airline',
|
||||
fieldValue: 'UAL',
|
||||
},
|
||||
],
|
||||
docCount: 101,
|
||||
pValue: 0.01,
|
||||
},
|
||||
{
|
||||
id: 'group-2',
|
||||
group: [
|
||||
{
|
||||
fieldName: 'custom_field.keyword',
|
||||
fieldValue: 'deviation',
|
||||
},
|
||||
{
|
||||
fieldName: 'airline',
|
||||
fieldValue: 'AAL',
|
||||
},
|
||||
],
|
||||
docCount: 49,
|
||||
pValue: 0.001,
|
||||
},
|
||||
];
|
||||
|
||||
describe('get_simple_hierarchical_tree', () => {
|
||||
describe('getFieldValuePairCounts', () => {
|
||||
it('returns a nested record with field/value pair counts', () => {
|
||||
const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups);
|
||||
|
||||
expect(fieldValuePairCounts).toEqual({
|
||||
airline: {
|
||||
AAL: 1,
|
||||
UAL: 1,
|
||||
},
|
||||
'custom_field.keyword': {
|
||||
deviation: 2,
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('markDuplicates', () => {
|
||||
it('marks duplicates based on change point groups', () => {
|
||||
const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups);
|
||||
const markedDuplicates = markDuplicates(changePointGroups, fieldValuePairCounts);
|
||||
|
||||
expect(markedDuplicates).toEqual([
|
||||
{
|
||||
id: 'group-1',
|
||||
group: [
|
||||
{
|
||||
fieldName: 'custom_field.keyword',
|
||||
fieldValue: 'deviation',
|
||||
duplicate: true,
|
||||
},
|
||||
{
|
||||
fieldName: 'airline',
|
||||
fieldValue: 'UAL',
|
||||
duplicate: false,
|
||||
},
|
||||
],
|
||||
docCount: 101,
|
||||
pValue: 0.01,
|
||||
},
|
||||
{
|
||||
id: 'group-2',
|
||||
group: [
|
||||
{
|
||||
fieldName: 'custom_field.keyword',
|
||||
fieldValue: 'deviation',
|
||||
duplicate: true,
|
||||
},
|
||||
{
|
||||
fieldName: 'airline',
|
||||
fieldValue: 'AAL',
|
||||
duplicate: false,
|
||||
},
|
||||
],
|
||||
docCount: 49,
|
||||
pValue: 0.001,
|
||||
},
|
||||
]);
|
||||
fields: ['response_code', 'url', 'user'],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -5,47 +5,15 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
// import { omit, uniq } from 'lodash';
|
||||
import type { ItemsetResult, SimpleHierarchicalTreeNode } from '../../../common/types';
|
||||
|
||||
import type { ChangePointGroup, FieldValuePair } from '@kbn/ml-agg-utils';
|
||||
import { stringHash } from '@kbn/ml-string-hash';
|
||||
import { getValueCounts } from './get_value_counts';
|
||||
import { getValuesDescending } from './get_values_descending';
|
||||
|
||||
import type { ItemsetResult } from './fetch_frequent_items';
|
||||
function NewNodeFactory(name: string): SimpleHierarchicalTreeNode {
|
||||
const children: SimpleHierarchicalTreeNode[] = [];
|
||||
|
||||
function getValueCounts(df: ItemsetResult[], field: string) {
|
||||
return df.reduce<Record<string, number>>((p, c) => {
|
||||
if (c.set[field] === undefined) {
|
||||
return p;
|
||||
}
|
||||
p[c.set[field]] = p[c.set[field]] ? p[c.set[field]] + 1 : 1;
|
||||
return p;
|
||||
}, {});
|
||||
}
|
||||
|
||||
function getValuesDescending(df: ItemsetResult[], field: string): string[] {
|
||||
const valueCounts = getValueCounts(df, field);
|
||||
const keys = Object.keys(valueCounts);
|
||||
|
||||
return keys.sort((a, b) => {
|
||||
return valueCounts[b] - valueCounts[a];
|
||||
});
|
||||
}
|
||||
|
||||
interface NewNode {
|
||||
name: string;
|
||||
set: FieldValuePair[];
|
||||
docCount: number;
|
||||
pValue: number | null;
|
||||
children: NewNode[];
|
||||
icon: string;
|
||||
iconStyle: string;
|
||||
addNode: (node: NewNode) => void;
|
||||
}
|
||||
|
||||
function NewNodeFactory(name: string): NewNode {
|
||||
const children: NewNode[] = [];
|
||||
|
||||
const addNode = (node: NewNode) => {
|
||||
const addNode = (node: SimpleHierarchicalTreeNode) => {
|
||||
children.push(node);
|
||||
};
|
||||
|
||||
|
@ -55,19 +23,15 @@ function NewNodeFactory(name: string): NewNode {
|
|||
docCount: 0,
|
||||
pValue: 0,
|
||||
children,
|
||||
icon: 'default',
|
||||
iconStyle: 'default',
|
||||
addNode,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple (poorly implemented) function that constructs a tree from an itemset DataFrame sorted by support (count)
|
||||
* Simple function that constructs a tree from an itemset DataFrame sorted by support (count)
|
||||
* The resulting tree components are non-overlapping subsets of the data.
|
||||
* In summary, we start with the most inclusive itemset (highest count), and perform a depth first search in field order.
|
||||
*
|
||||
* TODO - the code style here is hacky and should be re-written
|
||||
*
|
||||
* @param displayParent
|
||||
* @param parentDocCount
|
||||
* @param parentLabel
|
||||
|
@ -80,7 +44,7 @@ function NewNodeFactory(name: string): NewNode {
|
|||
*/
|
||||
function dfDepthFirstSearch(
|
||||
fields: string[],
|
||||
displayParent: NewNode,
|
||||
displayParent: SimpleHierarchicalTreeNode,
|
||||
parentDocCount: number,
|
||||
parentLabel: string,
|
||||
field: string,
|
||||
|
@ -108,7 +72,7 @@ function dfDepthFirstSearch(
|
|||
|
||||
let label = `${parentLabel} ${value}`;
|
||||
|
||||
let displayNode: NewNode;
|
||||
let displayNode: SimpleHierarchicalTreeNode;
|
||||
if (parentDocCount === docCount && collapseRedundant) {
|
||||
// collapse identical paths
|
||||
displayParent.name += ` ${value}`;
|
||||
|
@ -118,7 +82,6 @@ function dfDepthFirstSearch(
|
|||
displayNode = displayParent;
|
||||
} else {
|
||||
displayNode = NewNodeFactory(`${docCount}/${totalDocCount}${label}`);
|
||||
displayNode.iconStyle = 'warning';
|
||||
displayNode.set = [...displayParent.set];
|
||||
displayNode.set.push({ fieldName: field, fieldValue: value });
|
||||
displayNode.docCount = docCount;
|
||||
|
@ -130,8 +93,6 @@ function dfDepthFirstSearch(
|
|||
while (true) {
|
||||
const nextFieldIndex = fields.indexOf(field) + 1;
|
||||
if (nextFieldIndex >= fields.length) {
|
||||
displayNode.icon = 'file';
|
||||
displayNode.iconStyle = 'info';
|
||||
return docCount;
|
||||
}
|
||||
nextField = fields[nextFieldIndex];
|
||||
|
@ -147,7 +108,6 @@ function dfDepthFirstSearch(
|
|||
displayNode.name += ` '*'`;
|
||||
label += ` '*'`;
|
||||
const nextDisplayNode = NewNodeFactory(`${docCount}/${totalDocCount}${label}`);
|
||||
nextDisplayNode.iconStyle = 'warning';
|
||||
nextDisplayNode.set = displayNode.set;
|
||||
nextDisplayNode.docCount = docCount;
|
||||
nextDisplayNode.pValue = pValue;
|
||||
|
@ -194,12 +154,6 @@ export function getSimpleHierarchicalTree(
|
|||
displayOther: boolean,
|
||||
fields: string[] = []
|
||||
) {
|
||||
// const candidates = uniq(
|
||||
// df.flatMap((d) =>
|
||||
// Object.keys(omit(d, ['size', 'maxPValue', 'doc_count', 'support', 'total_doc_count']))
|
||||
// )
|
||||
// );
|
||||
|
||||
const field = fields[0];
|
||||
|
||||
const totalDocCount = Math.max(...df.map((d) => d.total_doc_count));
|
||||
|
@ -222,70 +176,3 @@ export function getSimpleHierarchicalTree(
|
|||
|
||||
return { root: newRoot, fields };
|
||||
}
|
||||
|
||||
/**
|
||||
* Get leaves from hierarchical tree.
|
||||
*/
|
||||
export function getSimpleHierarchicalTreeLeaves(
|
||||
tree: NewNode,
|
||||
leaves: ChangePointGroup[],
|
||||
level = 1
|
||||
) {
|
||||
if (tree.children.length === 0) {
|
||||
leaves.push({
|
||||
id: `${stringHash(JSON.stringify(tree.set))}`,
|
||||
group: tree.set,
|
||||
docCount: tree.docCount,
|
||||
pValue: tree.pValue,
|
||||
});
|
||||
} else {
|
||||
for (const child of tree.children) {
|
||||
const newLeaves = getSimpleHierarchicalTreeLeaves(child, [], level + 1);
|
||||
if (newLeaves.length > 0) {
|
||||
leaves.push(...newLeaves);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (leaves.length === 1 && leaves[0].group.length === 0 && leaves[0].docCount === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return leaves;
|
||||
}
|
||||
|
||||
type FieldValuePairCounts = Record<string, Record<string, number>>;
|
||||
/**
|
||||
* Get a nested record of field/value pairs with counts
|
||||
*/
|
||||
export function getFieldValuePairCounts(cpgs: ChangePointGroup[]): FieldValuePairCounts {
|
||||
return cpgs.reduce<FieldValuePairCounts>((p, { group }) => {
|
||||
group.forEach(({ fieldName, fieldValue }) => {
|
||||
if (p[fieldName] === undefined) {
|
||||
p[fieldName] = {};
|
||||
}
|
||||
p[fieldName][fieldValue] = p[fieldName][fieldValue] ? p[fieldName][fieldValue] + 1 : 1;
|
||||
});
|
||||
return p;
|
||||
}, {});
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyse duplicate field/value pairs in change point groups.
|
||||
*/
|
||||
export function markDuplicates(
|
||||
cpgs: ChangePointGroup[],
|
||||
fieldValuePairCounts: FieldValuePairCounts
|
||||
): ChangePointGroup[] {
|
||||
return cpgs.map((cpg) => {
|
||||
return {
|
||||
...cpg,
|
||||
group: cpg.group.map((g) => {
|
||||
return {
|
||||
...g,
|
||||
duplicate: fieldValuePairCounts[g.fieldName][g.fieldValue] > 1,
|
||||
};
|
||||
}),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { fields } from '../../../common/__mocks__/artificial_logs/fields';
|
||||
import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items';
|
||||
|
||||
import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree';
|
||||
import { getSimpleHierarchicalTreeLeaves } from './get_simple_hierarchical_tree_leaves';
|
||||
|
||||
describe('getSimpleHierarchicalTreeLeaves', () => {
|
||||
it('returns the hierarchical tree leaves', () => {
|
||||
const simpleHierarchicalTree = getSimpleHierarchicalTree(
|
||||
filteredFrequentItems,
|
||||
true,
|
||||
false,
|
||||
fields
|
||||
);
|
||||
const leaves = getSimpleHierarchicalTreeLeaves(simpleHierarchicalTree.root, []);
|
||||
expect(leaves).toEqual([
|
||||
{
|
||||
id: '2038579476',
|
||||
group: [
|
||||
{ fieldName: 'response_code', fieldValue: '500' },
|
||||
{ fieldName: 'url', fieldValue: 'home.php' },
|
||||
],
|
||||
docCount: 792,
|
||||
pValue: 0.010770456205312423,
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ChangePointGroup } from '@kbn/ml-agg-utils';
|
||||
import { stringHash } from '@kbn/ml-string-hash';
|
||||
|
||||
import type { SimpleHierarchicalTreeNode } from '../../../common/types';
|
||||
|
||||
/**
|
||||
* Get leaves from hierarchical tree.
|
||||
*/
|
||||
export function getSimpleHierarchicalTreeLeaves(
|
||||
tree: SimpleHierarchicalTreeNode,
|
||||
leaves: ChangePointGroup[],
|
||||
level = 1
|
||||
) {
|
||||
if (tree.children.length === 0) {
|
||||
leaves.push({
|
||||
id: `${stringHash(JSON.stringify(tree.set))}`,
|
||||
group: tree.set,
|
||||
docCount: tree.docCount,
|
||||
pValue: tree.pValue,
|
||||
});
|
||||
} else {
|
||||
for (const child of tree.children) {
|
||||
const newLeaves = getSimpleHierarchicalTreeLeaves(child, [], level + 1);
|
||||
if (newLeaves.length > 0) {
|
||||
leaves.push(...newLeaves);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (leaves.length === 1 && leaves[0].group.length === 0 && leaves[0].docCount === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return leaves;
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { frequentItems } from '../../../common/__mocks__/artificial_logs/frequent_items';
|
||||
import { getValueCounts } from './get_value_counts';
|
||||
|
||||
describe('getValueCounts', () => {
|
||||
it('get value counts for field response_code', () => {
|
||||
expect(getValueCounts(frequentItems, 'response_code')).toEqual({
|
||||
'200': 1,
|
||||
'404': 1,
|
||||
'500': 3,
|
||||
});
|
||||
});
|
||||
|
||||
it('get value counts for field url', () => {
|
||||
expect(getValueCounts(frequentItems, 'url')).toEqual({ 'home.php': 6 });
|
||||
});
|
||||
|
||||
it('get value counts for field user', () => {
|
||||
expect(getValueCounts(frequentItems, 'user')).toEqual({
|
||||
Mary: 1,
|
||||
Paul: 1,
|
||||
Peter: 3,
|
||||
});
|
||||
});
|
||||
});
|
|
@ -0,0 +1,18 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ItemsetResult } from '../../../common/types';
|
||||
|
||||
export function getValueCounts(df: ItemsetResult[], field: string) {
|
||||
return df.reduce<Record<string, number>>((p, c) => {
|
||||
if (c.set[field] === undefined) {
|
||||
return p;
|
||||
}
|
||||
p[c.set[field]] = p[c.set[field]] ? p[c.set[field]] + 1 : 1;
|
||||
return p;
|
||||
}, {});
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { frequentItems } from '../../../common/__mocks__/artificial_logs/frequent_items';
|
||||
import { getValuesDescending } from './get_values_descending';
|
||||
|
||||
describe('getValuesDescending', () => {
|
||||
it('get descending values for field response_code', () => {
|
||||
expect(getValuesDescending(frequentItems, 'response_code')).toEqual(['500', '200', '404']);
|
||||
});
|
||||
|
||||
it('get descending values for field url', () => {
|
||||
expect(getValuesDescending(frequentItems, 'url')).toEqual(['home.php']);
|
||||
});
|
||||
|
||||
it('get descending values for field user', () => {
|
||||
expect(getValuesDescending(frequentItems, 'user')).toEqual(['Peter', 'Mary', 'Paul']);
|
||||
});
|
||||
});
|
|
@ -0,0 +1,19 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ItemsetResult } from '../../../common/types';
|
||||
|
||||
import { getValueCounts } from './get_value_counts';
|
||||
|
||||
export function getValuesDescending(df: ItemsetResult[], field: string): string[] {
|
||||
const valueCounts = getValueCounts(df, field);
|
||||
const keys = Object.keys(valueCounts);
|
||||
|
||||
return keys.sort((a, b) => {
|
||||
return valueCounts[b] - valueCounts[a];
|
||||
});
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { changePointGroups } from '../../../common/__mocks__/artificial_logs/change_point_groups';
|
||||
import { changePoints } from '../../../common/__mocks__/artificial_logs/change_points';
|
||||
|
||||
import { duplicateIdentifier } from './duplicate_identifier';
|
||||
import { getGroupsWithReaddedDuplicates } from './get_groups_with_readded_duplicates';
|
||||
import { dropDuplicates, groupDuplicates } from './fetch_frequent_items';
|
||||
import { getFieldValuePairCounts } from './get_field_value_pair_counts';
|
||||
import { getMarkedDuplicates } from './get_marked_duplicates';
|
||||
import { getMissingChangePoints } from './get_missing_change_points';
|
||||
import { transformChangePointToGroup } from './transform_change_point_to_group';
|
||||
|
||||
describe('getMissingChangePoints', () => {
|
||||
it('get missing change points', () => {
|
||||
const deduplicatedChangePoints = dropDuplicates(changePoints, duplicateIdentifier);
|
||||
|
||||
const groupedChangePoints = groupDuplicates(changePoints, duplicateIdentifier).filter(
|
||||
(g) => g.group.length > 1
|
||||
);
|
||||
|
||||
const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups);
|
||||
const markedDuplicates = getMarkedDuplicates(changePointGroups, fieldValuePairCounts);
|
||||
const groupsWithReaddedDuplicates = getGroupsWithReaddedDuplicates(
|
||||
markedDuplicates,
|
||||
groupedChangePoints
|
||||
);
|
||||
|
||||
const missingChangePoints = getMissingChangePoints(
|
||||
deduplicatedChangePoints,
|
||||
groupsWithReaddedDuplicates
|
||||
);
|
||||
|
||||
const transformed = transformChangePointToGroup(missingChangePoints[0], groupedChangePoints);
|
||||
|
||||
expect(transformed).toEqual({
|
||||
docCount: 1981,
|
||||
group: [{ duplicate: false, fieldName: 'user', fieldValue: 'Peter' }],
|
||||
id: '817080373',
|
||||
pValue: 2.7454255728359757e-21,
|
||||
});
|
||||
});
|
||||
});
|
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { stringHash } from '@kbn/ml-string-hash';
|
||||
import type { ChangePoint } from '@kbn/ml-agg-utils';
|
||||
|
||||
import type { ChangePointDuplicateGroup } from '../../../common/types';
|
||||
|
||||
export function transformChangePointToGroup(
|
||||
changePoint: ChangePoint,
|
||||
groupedChangePoints: ChangePointDuplicateGroup[]
|
||||
) {
|
||||
const { fieldName, fieldValue, doc_count: docCount, pValue } = changePoint;
|
||||
|
||||
const duplicates = groupedChangePoints.find((d) =>
|
||||
d.group.some((dg) => dg.fieldName === fieldName && dg.fieldValue === fieldValue)
|
||||
);
|
||||
|
||||
if (duplicates !== undefined) {
|
||||
return {
|
||||
id: `${stringHash(
|
||||
JSON.stringify(
|
||||
duplicates.group.map((d) => ({
|
||||
fieldName: d.fieldName,
|
||||
fieldValue: d.fieldValue,
|
||||
}))
|
||||
)
|
||||
)}`,
|
||||
group: duplicates.group.map((d) => ({
|
||||
fieldName: d.fieldName,
|
||||
fieldValue: d.fieldValue,
|
||||
duplicate: false,
|
||||
})),
|
||||
docCount,
|
||||
pValue,
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
id: `${stringHash(JSON.stringify({ fieldName, fieldValue }))}`,
|
||||
group: [
|
||||
{
|
||||
fieldName,
|
||||
fieldValue,
|
||||
duplicate: false,
|
||||
},
|
||||
],
|
||||
docCount,
|
||||
pValue,
|
||||
};
|
||||
}
|
||||
}
|
|
@ -5,6 +5,12 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
// We're using the mocks for jest unit tests as expected data in the integration tests here.
|
||||
// This makes sure should the assertions for the integration tests need to be updated,
|
||||
// that also the jest unit tests use mocks that are not outdated.
|
||||
import { changePoints as artificialLogChangePoints } from '@kbn/aiops-plugin/common/__mocks__/artificial_logs/change_points';
|
||||
import { finalChangePointGroups as artificialLogsChangePointGroups } from '@kbn/aiops-plugin/common/__mocks__/artificial_logs/final_change_point_groups';
|
||||
|
||||
import type { TestData } from './types';
|
||||
|
||||
export const explainLogRateSpikesTestData: TestData[] = [
|
||||
|
@ -86,70 +92,8 @@ export const explainLogRateSpikesTestData: TestData[] = [
|
|||
groupHistogramFilter: 'add_change_point_group_histogram',
|
||||
histogramFilter: 'add_change_points_histogram',
|
||||
errorFilter: 'add_error',
|
||||
changePoints: [
|
||||
{
|
||||
fieldName: 'response_code',
|
||||
fieldValue: '500',
|
||||
doc_count: 1821,
|
||||
bg_count: 553,
|
||||
total_doc_count: 4671,
|
||||
total_bg_count: 1975,
|
||||
score: 26.546201745993947,
|
||||
pValue: 2.9589053032077285e-12,
|
||||
normalizedScore: 0.7814127409489161,
|
||||
},
|
||||
{
|
||||
fieldName: 'url',
|
||||
fieldValue: 'home.php',
|
||||
doc_count: 1742,
|
||||
bg_count: 632,
|
||||
total_doc_count: 4671,
|
||||
total_bg_count: 1975,
|
||||
score: 4.53094842981472,
|
||||
pValue: 0.010770456205312423,
|
||||
normalizedScore: 0.10333028878375965,
|
||||
},
|
||||
{
|
||||
fieldName: 'url',
|
||||
fieldValue: 'login.php',
|
||||
doc_count: 1742,
|
||||
bg_count: 632,
|
||||
total_doc_count: 4671,
|
||||
total_bg_count: 1975,
|
||||
score: 4.53094842981472,
|
||||
pValue: 0.010770456205312423,
|
||||
normalizedScore: 0.10333028878375965,
|
||||
},
|
||||
{
|
||||
fieldName: 'user',
|
||||
fieldValue: 'Peter',
|
||||
doc_count: 1981,
|
||||
bg_count: 553,
|
||||
total_doc_count: 4671,
|
||||
total_bg_count: 1975,
|
||||
score: 47.34435085428873,
|
||||
pValue: 2.7454255728359757e-21,
|
||||
normalizedScore: 0.8327337555873047,
|
||||
},
|
||||
],
|
||||
groups: [
|
||||
{
|
||||
id: '2038579476',
|
||||
group: [
|
||||
{ fieldName: 'response_code', fieldValue: '500', duplicate: false },
|
||||
{ fieldName: 'url', fieldValue: 'home.php', duplicate: false },
|
||||
{ fieldName: 'url', fieldValue: 'login.php', duplicate: false },
|
||||
],
|
||||
docCount: 792,
|
||||
pValue: 0.010770456205312423,
|
||||
},
|
||||
{
|
||||
id: '817080373',
|
||||
group: [{ fieldName: 'user', fieldValue: 'Peter', duplicate: false }],
|
||||
docCount: 1981,
|
||||
pValue: 2.7454255728359757e-21,
|
||||
},
|
||||
],
|
||||
changePoints: artificialLogChangePoints,
|
||||
groups: artificialLogsChangePointGroups,
|
||||
histogramLength: 20,
|
||||
},
|
||||
},
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue