[ML] Explain Log Rate Spikes: Limit fields for frequent_items agg. (#143974)

Limits the fields we pass on to the frequent_items aggregation to 15. This is a trade off between speed and quality of the grouping result. The amount of fields we pass on to the agg grow the time it takes to get frequent items more than linearly and we risk timeouts with more fields.
This commit is contained in:
Walter Rafelsberger 2022-10-27 14:35:33 +02:00 committed by GitHub
parent a5411c5e89
commit 0092f0ac53
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -13,6 +13,8 @@ import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server';
import type { Logger } from '@kbn/logging';
import type { ChangePoint, FieldValuePair } from '@kbn/ml-agg-utils';
const FREQUENT_ITEMS_FIELDS_LIMIT = 15;
interface FrequentItemsAggregation extends estypes.AggregationsSamplerAggregation {
fi: {
buckets: Array<{ key: Record<string, string[]>; doc_count: number; support: number }>;
@ -59,10 +61,19 @@ export async function fetchFrequentItems(
emitError: (m: string) => void,
abortSignal?: AbortSignal
) {
// get unique fields from change points
const fields = [...new Set(changePoints.map((t) => t.fieldName))];
// Sort change points by ascending p-value, necessary to apply the field limit correctly.
const sortedChangePoints = changePoints.slice().sort((a, b) => {
return (a.pValue ?? 0) - (b.pValue ?? 0);
});
// Get up to 15 unique fields from change points with retained order
const fields = sortedChangePoints.reduce<string[]>((p, c) => {
if (p.length < FREQUENT_ITEMS_FIELDS_LIMIT && !p.some((d) => d === c.fieldName)) {
p.push(c.fieldName);
}
return p;
}, []);
// TODO add query params
const query = {
bool: {
minimum_should_match: 2,
@ -77,7 +88,7 @@ export async function fetchFrequentItems(
},
},
],
should: changePoints.map((t) => {
should: sortedChangePoints.map((t) => {
return { term: { [t.fieldName]: t.fieldValue } };
}),
},
@ -117,16 +128,18 @@ export async function fetchFrequentItems(
},
};
const esBody = {
query,
aggs,
size: 0,
track_total_hits: true,
};
const body = await client.search<unknown, { sample: FrequentItemsAggregation }>(
{
index,
size: 0,
body: {
query,
aggs,
size: 0,
track_total_hits: true,
},
body: esBody,
},
{ signal: abortSignal, maxRetries: 0 }
);
@ -167,7 +180,7 @@ export async function fetchFrequentItems(
Object.entries(fis.key).forEach(([key, value]) => {
result.set[key] = value[0];
const pValue = changePoints.find(
const pValue = sortedChangePoints.find(
(t) => t.fieldName === key && t.fieldValue === value[0]
)?.pValue;