[8.6] [ML] Explain Log Rate Spikes: Fix mini histograms for groups with multiples values per field. (#147597) (#147865)

# Backport

This will backport the following commits from `main` to `8.6`:
- [[ML] Explain Log Rate Spikes: Fix mini histograms for groups with
multiples values per field.
(#147597)](https://github.com/elastic/kibana/pull/147597)

<!--- Backport version: 8.9.7 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Walter
Rafelsberger","email":"walter.rafelsberger@elastic.co"},"sourceCommit":{"committedDate":"2022-12-20T15:31:12Z","message":"[ML]
Explain Log Rate Spikes: Fix mini histograms for groups with multiples
values per field. (#147597)\n\nFor groups that have multiple values for
the same field, the group\r\nhistogram query wasn't able to fetch data
because it filters with a bool\r\n`must` and individual `term`
aggregations. This PR fixes it by using a\r\n`terms` aggregation if
there are multiple values for a field so just\r\nthese get treated as
`OR`.","sha":"3a48b7a84e587f0d5b9bffef0abecbce9c62e2e6","branchLabelMapping":{"^v8.7.0$":"main","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["bug","release_note:fix",":ml","Feature:ML/AIOps","v8.6.0","v8.7.0"],"number":147597,"url":"https://github.com/elastic/kibana/pull/147597","mergeCommit":{"message":"[ML]
Explain Log Rate Spikes: Fix mini histograms for groups with multiples
values per field. (#147597)\n\nFor groups that have multiple values for
the same field, the group\r\nhistogram query wasn't able to fetch data
because it filters with a bool\r\n`must` and individual `term`
aggregations. This PR fixes it by using a\r\n`terms` aggregation if
there are multiple values for a field so just\r\nthese get treated as
`OR`.","sha":"3a48b7a84e587f0d5b9bffef0abecbce9c62e2e6"}},"sourceBranch":"main","suggestedTargetBranches":["8.6"],"targetPullRequestStates":[{"branch":"8.6","label":"v8.6.0","labelRegex":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"main","label":"v8.7.0","labelRegex":"^v8.7.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/147597","number":147597,"mergeCommit":{"message":"[ML]
Explain Log Rate Spikes: Fix mini histograms for groups with multiples
values per field. (#147597)\n\nFor groups that have multiple values for
the same field, the group\r\nhistogram query wasn't able to fetch data
because it filters with a bool\r\n`must` and individual `term`
aggregations. This PR fixes it by using a\r\n`terms` aggregation if
there are multiple values for a field so just\r\nthese get treated as
`OR`.","sha":"3a48b7a84e587f0d5b9bffef0abecbce9c62e2e6"}}]}] BACKPORT-->

Co-authored-by: Walter Rafelsberger <walter.rafelsberger@elastic.co>
This commit is contained in:
Kibana Machine 2022-12-20 11:41:16 -05:00 committed by GitHub
parent a6a51f9216
commit 35ca3798b9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 89 additions and 6 deletions

View file

@ -58,6 +58,7 @@ import {
getSimpleHierarchicalTreeLeaves,
markDuplicates,
} from './queries/get_simple_hierarchical_tree';
import { getGroupFilter } from './queries/get_group_filter';
// 10s ping frequency to keep the stream alive.
const PING_FREQUENCY = 10000;
@ -639,12 +640,7 @@ export const defineExplainLogRateSpikesRoute = (
}
if (overallTimeSeries !== undefined) {
const histogramQuery = getHistogramQuery(
request.body,
cpg.group.map((d) => ({
term: { [d.fieldName]: d.fieldValue },
}))
);
const histogramQuery = getHistogramQuery(request.body, getGroupFilter(cpg));
let cpgTimeSeries: NumericChartData;
try {

View file

@ -0,0 +1,54 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { getGroupFilter } from './get_group_filter';
const changePointGroups = [
{
id: '2038579476',
group: [
{ fieldName: 'response_code', fieldValue: '500', duplicate: false },
{ fieldName: 'url', fieldValue: 'home.php', duplicate: false },
{ fieldName: 'url', fieldValue: 'login.php', duplicate: false },
],
docCount: 792,
pValue: 0.010770456205312423,
},
{
id: '817080373',
group: [{ fieldName: 'user', fieldValue: 'Peter', duplicate: false }],
docCount: 1981,
pValue: 2.7454255728359757e-21,
},
];
describe('getGroupFilter', () => {
it('gets a query filter for the change points of a group with multiple values per field', () => {
expect(getGroupFilter(changePointGroups[0])).toStrictEqual([
{
term: {
response_code: '500',
},
},
{
terms: {
url: ['home.php', 'login.php'],
},
},
]);
});
it('gets a query filter for the change points of a group with just a single field/value', () => {
expect(getGroupFilter(changePointGroups[1])).toStrictEqual([
{
term: {
user: 'Peter',
},
},
]);
});
});

View file

@ -0,0 +1,33 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import type { ChangePointGroup } from '@kbn/ml-agg-utils';
// Transforms a list of change point items from a group in a query filter.
// Uses a `term` filter for single field value combinations.
// For fields with multiple values it creates a single `terms` filter that includes
// all values. This avoids queries not returning any results otherwise because
// separate `term` filter for multiple values for the same field would rule each other out.
export function getGroupFilter(
changePointGroup: ChangePointGroup
): estypes.QueryDslQueryContainer[] {
return Object.entries(
changePointGroup.group.reduce<Record<string, Array<string | number>>>((p, c) => {
if (p[c.fieldName]) {
p[c.fieldName].push(c.fieldValue);
} else {
p[c.fieldName] = [c.fieldValue];
}
return p;
}, {})
).reduce<estypes.QueryDslQueryContainer[]>((p, [key, values]) => {
p.push(values.length > 1 ? { terms: { [key]: values } } : { term: { [key]: values[0] } });
return p;
}, []);
}