[ML] Explain Log Rate Spikes: Fix grouping edge cases. (#140891)

- Change point groups might miss individual change points that were not returned by the `frequent_items` agg as part of groups. This PR now adds each missing one as an individual additional group.
- Only return groups if there's at least one group with more than one item, otherwise fall back to basic table with significant terms.
- Changes the UI behaviour to show the regular table by default and the grouping switch set to off.
- Adds `p-value` column to grouped table and defaults to sorting by that column similar to table with indidivual items.
This commit is contained in:
Walter Rafelsberger 2022-09-20 17:37:26 +02:00 committed by GitHub
parent 45649ced6a
commit 706d3defdd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 504 additions and 118 deletions

View file

@ -97,4 +97,5 @@ interface ChangePointGroupItem extends FieldValuePair {
export interface ChangePointGroup {
group: ChangePointGroupItem[];
docCount: number;
pValue: number | null;
}

View file

@ -74,7 +74,7 @@ export const ExplainLogRateSpikesAnalysis: FC<ExplainLogRateSpikesAnalysisProps>
const [currentAnalysisWindowParameters, setCurrentAnalysisWindowParameters] = useState<
WindowParameters | undefined
>();
const [groupResults, setGroupResults] = useState<boolean>(true);
const [groupResults, setGroupResults] = useState<boolean>(false);
const onSwitchToggle = (e: { target: { checked: React.SetStateAction<boolean> } }) => {
setGroupResults(e.target.checked);
@ -106,6 +106,9 @@ export const ExplainLogRateSpikesAnalysis: FC<ExplainLogRateSpikesAnalysisProps>
// Start handler clears possibly hovered or pinned
// change points on analysis refresh.
function startHandler() {
// Reset grouping to false when restarting the analysis.
setGroupResults(false);
if (onPinnedChangePoint) {
onPinnedChangePoint(null);
}
@ -124,7 +127,7 @@ export const ExplainLogRateSpikesAnalysis: FC<ExplainLogRateSpikesAnalysisProps>
}, []);
const groupTableItems = useMemo(() => {
const tableItems = data.changePointsGroups.map(({ group, docCount }, index) => {
const tableItems = data.changePointsGroups.map(({ group, docCount, pValue }, index) => {
const sortedGroup = group.sort((a, b) =>
a.fieldName > b.fieldName ? 1 : b.fieldName > a.fieldName ? -1 : 0
);
@ -143,6 +146,7 @@ export const ExplainLogRateSpikesAnalysis: FC<ExplainLogRateSpikesAnalysisProps>
return {
id: index,
docCount,
pValue,
group: dedupedGroup,
repeatedValues,
};
@ -162,8 +166,7 @@ export const ExplainLogRateSpikesAnalysis: FC<ExplainLogRateSpikesAnalysisProps>
const groupItemCount = groupTableItems.reduce((p, c) => {
return p + Object.keys(c.group).length;
}, 0);
const foundGroups =
groupTableItems.length === 0 || (groupTableItems.length > 0 && groupItemCount > 0);
const foundGroups = groupTableItems.length > 0 && groupItemCount > 0;
return (
<div data-test-subj="aiopsExplainLogRateSpikesAnalysis">
@ -178,6 +181,7 @@ export const ExplainLogRateSpikesAnalysis: FC<ExplainLogRateSpikesAnalysisProps>
{showSpikeAnalysisTable && foundGroups && (
<EuiFormRow display="columnCompressedSwitch" label={groupResultsMessage}>
<EuiSwitch
data-test-subj={`aiopsExplainLogRateSpikesGroupSwitch${groupResults ? ' checked' : ''}`}
showLabel={false}
label={''}
checked={groupResults}

View file

@ -32,6 +32,7 @@ import { getFailedTransactionsCorrelationImpactLabel } from './get_failed_transa
const NARROW_COLUMN_WIDTH = '120px';
const ACTIONS_COLUMN_WIDTH = '60px';
const NOT_AVAILABLE = '--';
const PAGINATION_SIZE_OPTIONS = [5, 10, 20, 50];
const DEFAULT_SORT_FIELD = 'pValue';
@ -129,19 +130,17 @@ export const SpikeAnalysisTable: FC<SpikeAnalysisTableProps> = ({
{
'data-test-subj': 'aiopsSpikeAnalysisTableColumnFieldName',
field: 'fieldName',
name: i18n.translate(
'xpack.aiops.correlations.failedTransactions.correlationsTable.fieldNameLabel',
{ defaultMessage: 'Field name' }
),
name: i18n.translate('xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.fieldNameLabel', {
defaultMessage: 'Field name',
}),
sortable: true,
},
{
'data-test-subj': 'aiopsSpikeAnalysisTableColumnFieldValue',
field: 'fieldValue',
name: i18n.translate(
'xpack.aiops.correlations.failedTransactions.correlationsTable.fieldValueLabel',
{ defaultMessage: 'Field value' }
),
name: i18n.translate('xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.fieldValueLabel', {
defaultMessage: 'Field value',
}),
render: (_, { fieldValue }) => String(fieldValue).slice(0, 50),
sortable: true,
},
@ -153,7 +152,7 @@ export const SpikeAnalysisTable: FC<SpikeAnalysisTableProps> = ({
<EuiToolTip
position="top"
content={i18n.translate(
'xpack.aiops.correlations.failedTransactions.correlationsTable.logRateColumnTooltip',
'xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.logRateColumnTooltip',
{
defaultMessage:
'A visual representation of the impact of the field on the message rate difference',
@ -162,7 +161,7 @@ export const SpikeAnalysisTable: FC<SpikeAnalysisTableProps> = ({
>
<>
<FormattedMessage
id="xpack.aiops.correlations.failedTransactions.correlationsTable.logRateLabel"
id="xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.logRateLabel"
defaultMessage="Log rate"
/>
<EuiIcon size="s" color="subdued" type="questionInCircle" className="eui-alignTop" />
@ -178,6 +177,15 @@ export const SpikeAnalysisTable: FC<SpikeAnalysisTableProps> = ({
),
sortable: false,
},
{
'data-test-subj': 'aiopsSpikeAnalysisTableColumnDocCount',
width: NARROW_COLUMN_WIDTH,
field: 'doc_count',
name: i18n.translate('xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.docCountLabel', {
defaultMessage: 'Doc count',
}),
sortable: true,
},
{
'data-test-subj': 'aiopsSpikeAnalysisTableColumnPValue',
width: NARROW_COLUMN_WIDTH,
@ -186,7 +194,7 @@ export const SpikeAnalysisTable: FC<SpikeAnalysisTableProps> = ({
<EuiToolTip
position="top"
content={i18n.translate(
'xpack.aiops.correlations.failedTransactions.correlationsTable.pValueColumnTooltip',
'xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.pValueColumnTooltip',
{
defaultMessage:
'The significance of changes in the frequency of values; lower values indicate greater change',
@ -195,14 +203,14 @@ export const SpikeAnalysisTable: FC<SpikeAnalysisTableProps> = ({
>
<>
<FormattedMessage
id="xpack.aiops.correlations.failedTransactions.correlationsTable.pValueLabel"
id="xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.pValueLabel"
defaultMessage="p-value"
/>
<EuiIcon size="s" color="subdued" type="questionInCircle" className="eui-alignTop" />
</>
</EuiToolTip>
),
render: (pValue: number) => pValue.toPrecision(3),
render: (pValue: number | null) => pValue?.toPrecision(3) ?? NOT_AVAILABLE,
sortable: true,
},
{
@ -213,7 +221,7 @@ export const SpikeAnalysisTable: FC<SpikeAnalysisTableProps> = ({
<EuiToolTip
position="top"
content={i18n.translate(
'xpack.aiops.correlations.failedTransactions.correlationsTable.impactLabelColumnTooltip',
'xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.impactLabelColumnTooltip',
{
defaultMessage: 'The level of impact of the field on the message rate difference',
}
@ -221,7 +229,7 @@ export const SpikeAnalysisTable: FC<SpikeAnalysisTableProps> = ({
>
<>
<FormattedMessage
id="xpack.aiops.correlations.failedTransactions.correlationsTable.impactLabel"
id="xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.impactLabel"
defaultMessage="Impact"
/>
<EuiIcon size="s" color="subdued" type="questionInCircle" className="eui-alignTop" />

View file

@ -130,19 +130,17 @@ export const SpikeAnalysisTableExpandedRow: FC<SpikeAnalysisTableExpandedRowProp
{
'data-test-subj': 'aiopsSpikeAnalysisTableColumnFieldName',
field: 'fieldName',
name: i18n.translate(
'xpack.aiops.correlations.failedTransactions.correlationsTable.fieldNameLabel',
{ defaultMessage: 'Field name' }
),
name: i18n.translate('xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.fieldNameLabel', {
defaultMessage: 'Field name',
}),
sortable: true,
},
{
'data-test-subj': 'aiopsSpikeAnalysisTableColumnFieldValue',
field: 'fieldValue',
name: i18n.translate(
'xpack.aiops.correlations.failedTransactions.correlationsTable.fieldValueLabel',
{ defaultMessage: 'Field value' }
),
name: i18n.translate('xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.fieldValueLabel', {
defaultMessage: 'Field value',
}),
render: (_, { fieldValue }) => String(fieldValue).slice(0, 50),
sortable: true,
},
@ -154,7 +152,7 @@ export const SpikeAnalysisTableExpandedRow: FC<SpikeAnalysisTableExpandedRowProp
<EuiToolTip
position="top"
content={i18n.translate(
'xpack.aiops.correlations.failedTransactions.correlationsTable.logRateColumnTooltip',
'xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.logRateColumnTooltip',
{
defaultMessage:
'A visual representation of the impact of the field on the message rate difference',
@ -163,7 +161,7 @@ export const SpikeAnalysisTableExpandedRow: FC<SpikeAnalysisTableExpandedRowProp
>
<>
<FormattedMessage
id="xpack.aiops.correlations.failedTransactions.correlationsTable.logRateLabel"
id="xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.logRateLabel"
defaultMessage="Log rate"
/>
<EuiIcon size="s" color="subdued" type="questionInCircle" className="eui-alignTop" />
@ -190,7 +188,7 @@ export const SpikeAnalysisTableExpandedRow: FC<SpikeAnalysisTableExpandedRowProp
<EuiToolTip
position="top"
content={i18n.translate(
'xpack.aiops.correlations.failedTransactions.correlationsTable.pValueColumnTooltip',
'xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.pValueColumnTooltip',
{
defaultMessage:
'The significance of changes in the frequency of values; lower values indicate greater change',
@ -199,14 +197,14 @@ export const SpikeAnalysisTableExpandedRow: FC<SpikeAnalysisTableExpandedRowProp
>
<>
<FormattedMessage
id="xpack.aiops.correlations.failedTransactions.correlationsTable.pValueLabel"
id="xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.pValueLabel"
defaultMessage="p-value"
/>
<EuiIcon size="s" color="subdued" type="questionInCircle" className="eui-alignTop" />
</>
</EuiToolTip>
),
render: (pValue: number) => pValue?.toPrecision(3) ?? NOT_AVAILABLE,
render: (pValue: number | null) => pValue?.toPrecision(3) ?? NOT_AVAILABLE,
sortable: true,
},
{
@ -217,7 +215,7 @@ export const SpikeAnalysisTableExpandedRow: FC<SpikeAnalysisTableExpandedRowProp
<EuiToolTip
position="top"
content={i18n.translate(
'xpack.aiops.correlations.failedTransactions.correlationsTable.impactLabelColumnTooltip',
'xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.impactLabelColumnTooltip',
{
defaultMessage: 'The level of impact of the field on the message rate difference',
}
@ -225,7 +223,7 @@ export const SpikeAnalysisTableExpandedRow: FC<SpikeAnalysisTableExpandedRowProp
>
<>
<FormattedMessage
id="xpack.aiops.correlations.failedTransactions.correlationsTable.impactLabel"
id="xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.impactLabel"
defaultMessage="Impact"
/>
<EuiIcon size="s" color="subdued" type="questionInCircle" className="eui-alignTop" />

View file

@ -13,23 +13,32 @@ import {
EuiBasicTable,
EuiBasicTableColumn,
EuiButtonIcon,
EuiIcon,
EuiScreenReaderOnly,
EuiSpacer,
EuiTableSortingType,
EuiToolTip,
RIGHT_ALIGNMENT,
} from '@elastic/eui';
import { i18n } from '@kbn/i18n';
import { FormattedMessage } from '@kbn/i18n-react';
import type { ChangePoint } from '@kbn/ml-agg-utils';
import { useEuiTheme } from '../../hooks/use_eui_theme';
import { SpikeAnalysisTableExpandedRow } from './spike_analysis_table_expanded_row';
const NARROW_COLUMN_WIDTH = '120px';
const EXPAND_COLUMN_WIDTH = '40px';
const NOT_AVAILABLE = '--';
const PAGINATION_SIZE_OPTIONS = [5, 10, 20, 50];
const DEFAULT_SORT_FIELD = 'docCount';
const DEFAULT_SORT_DIRECTION = 'desc';
const DEFAULT_SORT_FIELD = 'pValue';
const DEFAULT_SORT_DIRECTION = 'asc';
interface GroupTableItem {
id: number;
docCount: number;
pValue: number | null;
group: Record<string, any>;
repeatedValues: Record<string, any>;
}
@ -107,7 +116,7 @@ export const SpikeAnalysisGroupsTable: FC<SpikeAnalysisTableProps> = ({
const columns: Array<EuiBasicTableColumn<GroupTableItem>> = [
{
align: RIGHT_ALIGNMENT,
width: '40px',
width: EXPAND_COLUMN_WIDTH,
isExpander: true,
name: (
<EuiScreenReaderOnly>
@ -126,10 +135,9 @@ export const SpikeAnalysisGroupsTable: FC<SpikeAnalysisTableProps> = ({
{
'data-test-subj': 'aiopsSpikeAnalysisGroupsTableColumnGroup',
field: 'group',
name: i18n.translate(
'xpack.aiops.correlations.failedTransactions.correlationsTable.groupLabel',
{ defaultMessage: 'Group' }
),
name: i18n.translate('xpack.aiops.explainLogRateSpikes.spikeAnalysisTableGroups.groupLabel', {
defaultMessage: 'Group',
}),
render: (_, { group, repeatedValues }) => {
const valuesBadges = [];
for (const fieldName in group) {
@ -159,7 +167,11 @@ export const SpikeAnalysisGroupsTable: FC<SpikeAnalysisTableProps> = ({
data-test-subj="aiopsSpikeAnalysisGroupsTableColumnGroupBadge"
color="hollow"
>
+{Object.keys(repeatedValues).length} more
+{Object.keys(repeatedValues).length}{' '}
<FormattedMessage
id="xpack.aiops.explainLogRateSpikes.spikeAnalysisTableGroups.moreLabel"
defaultMessage="more"
/>
</EuiBadge>
<EuiSpacer size="xs" />
</>
@ -170,10 +182,37 @@ export const SpikeAnalysisGroupsTable: FC<SpikeAnalysisTableProps> = ({
sortable: false,
textOnly: true,
},
{
'data-test-subj': 'aiopsSpikeAnalysisGroupsTableColumnPValue',
width: NARROW_COLUMN_WIDTH,
field: 'pValue',
name: (
<EuiToolTip
position="top"
content={i18n.translate(
'xpack.aiops.explainLogRateSpikes.spikeAnalysisTableGroups.pValueColumnTooltip',
{
defaultMessage:
'The significance of changes in the frequency of values; lower values indicate greater change',
}
)}
>
<>
<FormattedMessage
id="xpack.aiops.explainLogRateSpikes.spikeAnalysisTableGroups.pValueLabel"
defaultMessage="p-value"
/>
<EuiIcon size="s" color="subdued" type="questionInCircle" className="eui-alignTop" />
</>
</EuiToolTip>
),
render: (pValue: number | null) => pValue?.toPrecision(3) ?? NOT_AVAILABLE,
sortable: true,
},
{
'data-test-subj': 'aiopsSpikeAnalysisGroupsTableColumnDocCount',
field: 'docCount',
name: i18n.translate('xpack.aiops.correlations.correlationsTable.docCountLabel', {
name: i18n.translate('xpack.aiops.correlations.spikeAnalysisTableGroups.docCountLabel', {
defaultMessage: 'Doc count',
}),
sortable: true,

View file

@ -35,8 +35,14 @@ import type { AiopsLicense } from '../types';
import { fetchChangePointPValues } from './queries/fetch_change_point_p_values';
import { fetchFieldCandidates } from './queries/fetch_field_candidates';
import { fetchFrequentItems } from './queries/fetch_frequent_items';
import {
dropDuplicates,
fetchFrequentItems,
groupDuplicates,
} from './queries/fetch_frequent_items';
import type { ItemsetResult } from './queries/fetch_frequent_items';
import {
getFieldValuePairCounts,
getSimpleHierarchicalTree,
getSimpleHierarchicalTreeLeaves,
markDuplicates,
@ -211,31 +217,147 @@ export const defineExplainLogRateSpikesRoute = (
}
if (groupingEnabled) {
// To optimize the `frequent_items` query, we identify duplicate change points by count attributes.
// Note this is a compromise and not 100% accurate because there could be change points that
// have the exact same counts but still don't co-occur.
const duplicateIdentifier: Array<keyof ChangePoint> = [
'doc_count',
'bg_count',
'total_doc_count',
'total_bg_count',
];
// These are the deduplicated change points we pass to the `frequent_items` aggregation.
const deduplicatedChangePoints = dropDuplicates(changePoints, duplicateIdentifier);
// We use the grouped change points to later repopulate
// the `frequent_items` result with the missing duplicates.
const groupedChangePoints = groupDuplicates(changePoints, duplicateIdentifier).filter(
(g) => g.group.length > 1
);
const { fields, df } = await fetchFrequentItems(
client,
request.body.index,
JSON.parse(request.body.searchQuery) as estypes.QueryDslQueryContainer,
changePoints,
deduplicatedChangePoints,
request.body.timeFieldName,
request.body.deviationMin,
request.body.deviationMax
);
// Filter itemsets by significant change point field value pairs
const filteredDf = df.filter((fi) => {
const { set: currentItems } = fi;
// The way the `frequent_items` aggregations works could return item sets that include
// field/value pairs that are not part of the original list of significant change points.
// This cleans up groups and removes those unrelated field/value pairs.
const filteredDf = df
.map((fi) => {
fi.set = Object.entries(fi.set).reduce<ItemsetResult['set']>(
(set, [field, value]) => {
if (
changePoints.some((cp) => cp.fieldName === field && cp.fieldValue === value)
) {
set[field] = value;
}
return set;
},
{}
);
fi.size = Object.keys(fi.set).length;
return fi;
})
.filter((fi) => fi.size > 1);
return Object.entries(currentItems).every(([key, value]) => {
return changePoints.some((cp) => {
return cp.fieldName === key && cp.fieldValue === value;
});
// `frequent_items` returns lot of different small groups of field/value pairs that co-occur.
// The following steps analyse these small groups, identify overlap between these groups,
// and then summarize them in larger groups where possible.
// Get a tree structure based on `frequent_items`.
const { root } = getSimpleHierarchicalTree(filteredDf, true, false, fields);
// Each leave of the tree will be a summarized group of co-occuring field/value pairs.
const treeLeaves = getSimpleHierarchicalTreeLeaves(root, []);
// To be able to display a more cleaned up results table in the UI, we identify field/value pairs
// that occur in multiple groups. This will allow us to highlight field/value pairs that are
// unique to a group in a better way. This step will also re-add duplicates we identified in the
// beginning and didn't pass on to the `frequent_items` agg.
const fieldValuePairCounts = getFieldValuePairCounts(treeLeaves);
const changePointGroups = markDuplicates(treeLeaves, fieldValuePairCounts).map((g) => {
const group = [...g.group];
for (const groupItem of g.group) {
const { duplicate } = groupItem;
const duplicates = groupedChangePoints.find((d) =>
d.group.some(
(dg) =>
dg.fieldName === groupItem.fieldName && dg.fieldValue === groupItem.fieldValue
)
);
if (duplicates !== undefined) {
group.push(
...duplicates.group.map((d) => {
return {
fieldName: d.fieldName,
fieldValue: d.fieldValue,
duplicate,
};
})
);
}
}
return {
...g,
group,
};
});
// Some field/value pairs might not be part of the `frequent_items` result set, for example
// because they don't co-occur with other field/value pairs or because of the limits we set on the query.
// In this next part we identify those missing pairs and add them as individual groups.
const missingChangePoints = deduplicatedChangePoints.filter((cp) => {
return !changePointGroups.some((cpg) => {
return cpg.group.some(
(d) => d.fieldName === cp.fieldName && d.fieldValue === cp.fieldValue
);
});
});
const { root } = getSimpleHierarchicalTree(filteredDf, true, false, fields);
const changePointsGroups = getSimpleHierarchicalTreeLeaves(root, []);
changePointGroups.push(
...missingChangePoints.map((cp) => {
const duplicates = groupedChangePoints.find((d) =>
d.group.some(
(dg) => dg.fieldName === cp.fieldName && dg.fieldValue === cp.fieldValue
)
);
if (duplicates !== undefined) {
return {
group: duplicates.group.map((d) => ({
fieldName: d.fieldName,
fieldValue: d.fieldValue,
duplicate: false,
})),
docCount: cp.doc_count,
pValue: cp.pValue,
};
} else {
return {
group: [{ fieldName: cp.fieldName, fieldValue: cp.fieldValue, duplicate: false }],
docCount: cp.doc_count,
pValue: cp.pValue,
};
}
})
);
push(addChangePointsGroupAction(markDuplicates(changePointsGroups)));
// Finally, we'll find out if there's at least one group with at least two items,
// only then will we return the groups to the clients and make the grouping option available.
const maxItems = Math.max(...changePointGroups.map((g) => g.group.length));
if (maxItems > 1) {
push(addChangePointsGroupAction(changePointGroups));
}
}
const histogramFields: [NumericHistogramField] = [

View file

@ -10,7 +10,7 @@ import { uniq, uniqWith, pick, isEqual } from 'lodash';
import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server';
import type { ChangePoint } from '@kbn/ml-agg-utils';
import type { ChangePoint, FieldValuePair } from '@kbn/ml-agg-utils';
interface FrequentItemsAggregation extends estypes.AggregationsSamplerAggregation {
fi: {
@ -18,8 +18,32 @@ interface FrequentItemsAggregation extends estypes.AggregationsSamplerAggregatio
};
}
function dropDuplicates(cp: ChangePoint[], uniqueFields: string[]) {
return uniqWith(cp, (a, b) => isEqual(pick(a, uniqueFields), pick(b, uniqueFields)));
export function dropDuplicates(cps: ChangePoint[], uniqueFields: Array<keyof ChangePoint>) {
return uniqWith(cps, (a, b) => isEqual(pick(a, uniqueFields), pick(b, uniqueFields)));
}
interface ChangePointDuplicateGroup {
keys: Pick<ChangePoint, keyof ChangePoint>;
group: ChangePoint[];
}
export function groupDuplicates(cps: ChangePoint[], uniqueFields: Array<keyof ChangePoint>) {
const groups: ChangePointDuplicateGroup[] = [];
for (const cp of cps) {
const compareAttributes = pick(cp, uniqueFields);
const groupIndex = groups.findIndex((g) => isEqual(g.keys, compareAttributes));
if (groupIndex === -1) {
groups.push({
keys: compareAttributes,
group: [cp],
});
} else {
groups[groupIndex].group.push(cp);
}
}
return groups;
}
export async function fetchFrequentItems(
@ -31,17 +55,8 @@ export async function fetchFrequentItems(
deviationMin: number,
deviationMax: number
) {
// first remove duplicates in sig terms - note this is not strictly perfect as there could
// be conincidentally equal counts, but in general is ok...
const terms = dropDuplicates(changePoints, [
'doc_count',
'bg_count',
'total_doc_count',
'total_bg_count',
]);
// get unique fields that are left
const fields = [...new Set(terms.map((t) => t.fieldName))];
const fields = [...new Set(changePoints.map((t) => t.fieldName))];
// TODO add query params
const query = {
@ -58,7 +73,7 @@ export async function fetchFrequentItems(
},
},
],
should: terms.map((t) => {
should: changePoints.map((t) => {
return { term: { [t.fieldName]: t.fieldValue } };
}),
},
@ -68,7 +83,7 @@ export async function fetchFrequentItems(
field,
}));
const totalDocCount = terms[0].total_doc_count;
const totalDocCount = changePoints[0].total_doc_count;
const minDocCount = 50000;
let sampleProbability = 1;
@ -88,7 +103,7 @@ export async function fetchFrequentItems(
frequent_items: {
minimum_set_size: 2,
size: 200,
minimum_support: 0.1,
minimum_support: 0.01,
fields: aggFields,
},
},
@ -153,7 +168,7 @@ export async function fetchFrequentItems(
return;
}
result.size = Object.keys(result).length;
result.size = Object.keys(result.set).length;
result.maxPValue = maxPValue;
result.doc_count = fis.doc_count;
result.support = fis.support;
@ -162,15 +177,21 @@ export async function fetchFrequentItems(
results.push(result);
});
results.sort((a, b) => {
return b.doc_count - a.doc_count;
});
const uniqueFields = uniq(results.flatMap((r) => Object.keys(r.set)));
return {
fields: uniq(results.flatMap((r) => Object.keys(r.set))),
fields: uniqueFields,
df: results,
totalDocCount: totalDocCountFi,
};
}
export interface ItemsetResult {
set: Record<string, string>;
set: Record<FieldValuePair['fieldName'], FieldValuePair['fieldValue']>;
size: number;
maxPValue: number;
doc_count: number;

View file

@ -0,0 +1,101 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { ChangePointGroup } from '@kbn/ml-agg-utils';
import { getFieldValuePairCounts, markDuplicates } from './get_simple_hierarchical_tree';
const changePointGroups: ChangePointGroup[] = [
{
group: [
{
fieldName: 'custom_field.keyword',
fieldValue: 'deviation',
},
{
fieldName: 'airline',
fieldValue: 'UAL',
},
],
docCount: 101,
pValue: 0.01,
},
{
group: [
{
fieldName: 'custom_field.keyword',
fieldValue: 'deviation',
},
{
fieldName: 'airline',
fieldValue: 'AAL',
},
],
docCount: 49,
pValue: 0.001,
},
];
describe('get_simple_hierarchical_tree', () => {
describe('getFieldValuePairCounts', () => {
it('returns a nested record with field/value pair counts', () => {
const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups);
expect(fieldValuePairCounts).toEqual({
airline: {
AAL: 1,
UAL: 1,
},
'custom_field.keyword': {
deviation: 2,
},
});
});
});
describe('markDuplicates', () => {
it('marks duplicates based on change point groups', () => {
const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups);
const markedDuplicates = markDuplicates(changePointGroups, fieldValuePairCounts);
expect(markedDuplicates).toEqual([
{
group: [
{
fieldName: 'custom_field.keyword',
fieldValue: 'deviation',
duplicate: true,
},
{
fieldName: 'airline',
fieldValue: 'UAL',
duplicate: false,
},
],
docCount: 101,
pValue: 0.01,
},
{
group: [
{
fieldName: 'custom_field.keyword',
fieldValue: 'deviation',
duplicate: true,
},
{
fieldName: 'airline',
fieldValue: 'AAL',
duplicate: false,
},
],
docCount: 49,
pValue: 0.001,
},
]);
});
});
});

View file

@ -12,13 +12,13 @@ import type { ChangePointGroup, FieldValuePair } from '@kbn/ml-agg-utils';
import type { ItemsetResult } from './fetch_frequent_items';
function getValueCounts(df: ItemsetResult[], field: string) {
return df.reduce((p, c) => {
return df.reduce<Record<string, number>>((p, c) => {
if (c.set[field] === undefined) {
return p;
}
p[c.set[field]] = p[c.set[field]] ? p[c.set[field]] + 1 : 1;
return p;
}, {} as Record<string, number>);
}, {});
}
function getValuesDescending(df: ItemsetResult[], field: string): string[] {
@ -34,6 +34,7 @@ interface NewNode {
name: string;
set: FieldValuePair[];
docCount: number;
pValue: number | null;
children: NewNode[];
icon: string;
iconStyle: string;
@ -51,6 +52,7 @@ function NewNodeFactory(name: string): NewNode {
name,
set: [],
docCount: 0,
pValue: 0,
children,
icon: 'default',
iconStyle: 'default',
@ -87,8 +89,8 @@ function dfDepthFirstSearch(
displayOther: boolean
) {
const filteredItemSets = iss.filter((is) => {
for (const [key, values] of Object.entries(is.set)) {
if (key === field && values.includes(value)) {
for (const [key, setValue] of Object.entries(is.set)) {
if (key === field && setValue === value) {
return true;
}
}
@ -100,6 +102,7 @@ function dfDepthFirstSearch(
}
const docCount = Math.max(...filteredItemSets.map((fis) => fis.doc_count));
const pValue = Math.max(...filteredItemSets.map((fis) => fis.maxPValue));
const totalDocCount = Math.max(...filteredItemSets.map((fis) => fis.total_doc_count));
let label = `${parentLabel} ${value}`;
@ -110,6 +113,7 @@ function dfDepthFirstSearch(
displayParent.name += ` ${value}`;
displayParent.set.push({ fieldName: field, fieldValue: value });
displayParent.docCount = docCount;
displayParent.pValue = pValue;
displayNode = displayParent;
} else {
displayNode = NewNodeFactory(`${docCount}/${totalDocCount}${label}`);
@ -117,6 +121,7 @@ function dfDepthFirstSearch(
displayNode.set = [...displayParent.set];
displayNode.set.push({ fieldName: field, fieldValue: value });
displayNode.docCount = docCount;
displayNode.pValue = pValue;
displayParent.addNode(displayNode);
}
@ -144,6 +149,7 @@ function dfDepthFirstSearch(
nextDisplayNode.iconStyle = 'warning';
nextDisplayNode.set = displayNode.set;
nextDisplayNode.docCount = docCount;
nextDisplayNode.pValue = pValue;
displayNode.addNode(nextDisplayNode);
displayNode = nextDisplayNode;
}
@ -226,7 +232,7 @@ export function getSimpleHierarchicalTreeLeaves(
) {
// console.log(`${'-'.repeat(level)} ${tree.name} ${tree.children.length}`);
if (tree.children.length === 0) {
leaves.push({ group: tree.set, docCount: tree.docCount });
leaves.push({ group: tree.set, docCount: tree.docCount, pValue: tree.pValue });
} else {
for (const child of tree.children) {
const newLeaves = getSimpleHierarchicalTreeLeaves(child, [], level + 1);
@ -236,29 +242,43 @@ export function getSimpleHierarchicalTreeLeaves(
}
}
if (leaves.length === 1 && leaves[0].group.length === 0 && leaves[0].docCount === 0) {
return [];
}
return leaves;
}
type FieldValuePairCounts = Record<string, Record<string, number>>;
/**
* Get a nested record of field/value pairs with counts
*/
export function getFieldValuePairCounts(cpgs: ChangePointGroup[]): FieldValuePairCounts {
return cpgs.reduce<FieldValuePairCounts>((p, { group }) => {
group.forEach(({ fieldName, fieldValue }) => {
if (p[fieldName] === undefined) {
p[fieldName] = {};
}
p[fieldName][fieldValue] = p[fieldName][fieldValue] ? p[fieldName][fieldValue] + 1 : 1;
});
return p;
}, {});
}
/**
* Analyse duplicate field/value pairs in change point groups.
*/
export function markDuplicates(cpgs: ChangePointGroup[]): ChangePointGroup[] {
const fieldValuePairCounts: Record<string, number> = {};
cpgs.forEach((cpg) => {
cpg.group.forEach((g) => {
const str = `${g.fieldName}$$$$${g.fieldValue}`;
fieldValuePairCounts[str] = fieldValuePairCounts[str] ? fieldValuePairCounts[str] + 1 : 1;
});
});
export function markDuplicates(
cpgs: ChangePointGroup[],
fieldValuePairCounts: FieldValuePairCounts
): ChangePointGroup[] {
return cpgs.map((cpg) => {
return {
...cpg,
group: cpg.group.map((g) => {
const str = `${g.fieldName}$$$$${g.fieldValue}`;
return {
...g,
duplicate: fieldValuePairCounts[str] > 1,
duplicate: fieldValuePairCounts[g.fieldName][g.fieldValue] > 1,
};
}),
};

View file

@ -6385,14 +6385,14 @@
"xpack.aiops.progressTitle": "Progression : {progress} % — {progressMessage}",
"xpack.aiops.searchPanel.totalDocCountLabel": "Total des documents : {strongTotalCount}",
"xpack.aiops.searchPanel.totalDocCountNumber": "{totalCount, plural, other {#}}",
"xpack.aiops.correlations.failedTransactions.correlationsTable.fieldNameLabel": "Nom du champ",
"xpack.aiops.correlations.failedTransactions.correlationsTable.fieldValueLabel": "Valeur du champ",
"xpack.aiops.correlations.failedTransactions.correlationsTable.impactLabel": "Impact",
"xpack.aiops.correlations.failedTransactions.correlationsTable.impactLabelColumnTooltip": "Le niveau d'impact du champ sur la différence de taux de messages",
"xpack.aiops.correlations.failedTransactions.correlationsTable.logRateColumnTooltip": "Une représentation visuelle de l'impact du champ sur la différence de taux de messages",
"xpack.aiops.correlations.failedTransactions.correlationsTable.logRateLabel": "Taux du log",
"xpack.aiops.correlations.failedTransactions.correlationsTable.pValueColumnTooltip": "L'importance de changements dans la fréquence des valeurs ; des valeurs plus faibles indiquent un changement plus important.",
"xpack.aiops.correlations.failedTransactions.correlationsTable.pValueLabel": "valeur-p",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.fieldNameLabel": "Nom du champ",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.fieldValueLabel": "Valeur du champ",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.impactLabel": "Impact",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.impactLabelColumnTooltip": "Le niveau d'impact du champ sur la différence de taux de messages",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.logRateColumnTooltip": "Une représentation visuelle de l'impact du champ sur la différence de taux de messages",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.logRateLabel": "Taux du log",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.pValueColumnTooltip": "L'importance de changements dans la fréquence des valeurs ; des valeurs plus faibles indiquent un changement plus important.",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.pValueLabel": "valeur-p",
"xpack.aiops.correlations.highImpactText": "Élevé",
"xpack.aiops.correlations.lowImpactText": "Bas",
"xpack.aiops.correlations.mediumImpactText": "Moyenne",

View file

@ -6379,14 +6379,14 @@
"xpack.aiops.progressTitle": "進行状況:{progress}% — {progressMessage}",
"xpack.aiops.searchPanel.totalDocCountLabel": "合計ドキュメント数:{strongTotalCount}",
"xpack.aiops.searchPanel.totalDocCountNumber": "{totalCount, plural, other {#}}",
"xpack.aiops.correlations.failedTransactions.correlationsTable.fieldNameLabel": "フィールド名",
"xpack.aiops.correlations.failedTransactions.correlationsTable.fieldValueLabel": "フィールド値",
"xpack.aiops.correlations.failedTransactions.correlationsTable.impactLabel": "インパクト",
"xpack.aiops.correlations.failedTransactions.correlationsTable.impactLabelColumnTooltip": "メッセージレート差異に対するフィールドの影響のレベル",
"xpack.aiops.correlations.failedTransactions.correlationsTable.logRateColumnTooltip": "メッセージレート差異に対するフィールドの影響の視覚的な表示",
"xpack.aiops.correlations.failedTransactions.correlationsTable.logRateLabel": "ログレート",
"xpack.aiops.correlations.failedTransactions.correlationsTable.pValueColumnTooltip": "値の頻度の変化の有意性。値が小さいほど、変化が大きいことを示します。",
"xpack.aiops.correlations.failedTransactions.correlationsTable.pValueLabel": "p値",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.fieldNameLabel": "フィールド名",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.fieldValueLabel": "フィールド値",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.impactLabel": "インパクト",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.impactLabelColumnTooltip": "メッセージレート差異に対するフィールドの影響のレベル",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.logRateColumnTooltip": "メッセージレート差異に対するフィールドの影響の視覚的な表示",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.logRateLabel": "ログレート",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.pValueColumnTooltip": "値の頻度の変化の有意性。値が小さいほど、変化が大きいことを示します。",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.pValueLabel": "p値",
"xpack.aiops.correlations.highImpactText": "高",
"xpack.aiops.correlations.lowImpactText": "低",
"xpack.aiops.correlations.mediumImpactText": "中",

View file

@ -6386,14 +6386,14 @@
"xpack.aiops.progressTitle": "进度:{progress}% — {progressMessage}",
"xpack.aiops.searchPanel.totalDocCountLabel": "文档总数:{strongTotalCount}",
"xpack.aiops.searchPanel.totalDocCountNumber": "{totalCount, plural, other {#}}",
"xpack.aiops.correlations.failedTransactions.correlationsTable.fieldNameLabel": "字段名称",
"xpack.aiops.correlations.failedTransactions.correlationsTable.fieldValueLabel": "字段值",
"xpack.aiops.correlations.failedTransactions.correlationsTable.impactLabel": "影响",
"xpack.aiops.correlations.failedTransactions.correlationsTable.impactLabelColumnTooltip": "字段对消息速率差异的影响水平",
"xpack.aiops.correlations.failedTransactions.correlationsTable.logRateColumnTooltip": "字段对消息速率差异的影响的视觉表示形式",
"xpack.aiops.correlations.failedTransactions.correlationsTable.logRateLabel": "日志速率",
"xpack.aiops.correlations.failedTransactions.correlationsTable.pValueColumnTooltip": "值的频率更改的意义;值越小表示变化越大",
"xpack.aiops.correlations.failedTransactions.correlationsTable.pValueLabel": "p-value",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.fieldNameLabel": "字段名称",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.fieldValueLabel": "字段值",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.impactLabel": "影响",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.impactLabelColumnTooltip": "字段对消息速率差异的影响水平",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.logRateColumnTooltip": "字段对消息速率差异的影响的视觉表示形式",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.logRateLabel": "日志速率",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.pValueColumnTooltip": "值的频率更改的意义;值越小表示变化越大",
"xpack.aiops.explainLogRateSpikes.spikeAnalysisTable.pValueLabel": "p-value",
"xpack.aiops.correlations.highImpactText": "高",
"xpack.aiops.correlations.lowImpactText": "低",
"xpack.aiops.correlations.mediumImpactText": "中",

View file

@ -12,6 +12,7 @@ import type { TestData } from './types';
import { farequoteDataViewTestData } from './test_data';
export default function ({ getPageObject, getService }: FtrProviderContext) {
const es = getService('es');
const headerPage = getPageObject('header');
const elasticChart = getService('elasticChart');
const esArchiver = getService('esArchiver');
@ -114,6 +115,12 @@ export default function ({ getPageObject, getService }: FtrProviderContext) {
await aiops.explainLogRateSpikes.clickRerunAnalysisButton(true);
await aiops.explainLogRateSpikes.assertProgressTitle('Progress: 100% — Done.');
// The group switch should be disabled by default
await aiops.explainLogRateSpikes.assertSpikeAnalysisGroupSwitchExists(false);
// Enabled grouping
await aiops.explainLogRateSpikes.clickSpikeAnalysisGroupSwitch(false);
await aiops.explainLogRateSpikesAnalysisGroupsTable.assertSpikeAnalysisTableExists();
const analysisGroupsTable =
@ -131,12 +138,51 @@ export default function ({ getPageObject, getService }: FtrProviderContext) {
}
// Failing: See https://github.com/elastic/kibana/issues/140848
describe.skip('explain log rate spikes', function () {
describe('explain log rate spikes', function () {
this.tags(['aiops']);
before(async () => {
await esArchiver.loadIfNeeded('x-pack/test/functional/es_archives/ml/farequote');
await ml.testResources.createIndexPatternIfNeeded('ft_farequote', '@timestamp');
await es.updateByQuery({
index: 'ft_farequote',
body: {
script: {
// @ts-expect-error
inline: 'ctx._source.custom_field = "default"',
lang: 'painless',
},
},
});
for (const i of [...Array(100)]) {
await es.index({
index: 'ft_farequote',
body: {
'@timestamp': '2016-02-09T16:19:59.000Z',
'@version': i,
airline: 'UAL',
custom_field: 'deviation',
responsetime: 10,
type: 'farequote',
},
});
}
await es.index({
index: 'ft_farequote',
body: {
'@timestamp': '2016-02-09T16:19:59.000Z',
'@version': 101,
airline: 'UAL',
custom_field: 'deviation',
responsetime: 10,
type: 'farequote',
},
refresh: 'wait_for',
});
await ml.testResources.setKibanaTimeZoneToUTC();
await ml.securityUI.loginAsMlPowerUser();

View file

@ -13,14 +13,20 @@ export const farequoteDataViewTestData: TestData = {
sourceIndexOrSavedSearch: 'ft_farequote',
brushTargetTimestamp: 1455033600000,
expected: {
totalDocCountFormatted: '86,274',
analysisGroupsTable: [{ group: 'airline: AAL', docCount: '297' }],
totalDocCountFormatted: '86,375',
analysisGroupsTable: [
{ docCount: '297', group: 'airline: AAL' },
{
docCount: '101',
group: 'airline: UALcustom_field.keyword: deviation',
},
],
analysisTable: [
{
fieldName: 'airline',
fieldValue: 'AAL',
logRate: 'Chart type:bar chart',
pValue: '1.26e-13',
pValue: '5.00e-11',
impact: 'High',
},
],

View file

@ -99,6 +99,26 @@ export function ExplainLogRateSpikesProvider({ getService }: FtrProviderContext)
});
},
async assertSpikeAnalysisGroupSwitchExists(checked: boolean) {
await retry.tryForTime(5000, async () => {
await testSubjects.existOrFail(
`aiopsExplainLogRateSpikesGroupSwitch${checked ? ' checked' : ''}`
);
});
},
async clickSpikeAnalysisGroupSwitch(checked: boolean) {
await testSubjects.clickWhenNotDisabledWithoutRetry(
`aiopsExplainLogRateSpikesGroupSwitch${checked ? ' checked' : ''}`
);
await retry.tryForTime(30 * 1000, async () => {
await testSubjects.existOrFail(
`aiopsExplainLogRateSpikesGroupSwitch${!checked ? ' checked' : ''}`
);
});
},
async assertRerunAnalysisButtonExists(shouldRerun: boolean) {
await testSubjects.existOrFail(
`aiopsRerunAnalysisButton${shouldRerun ? ' shouldRerun' : ''}`