mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 01:38:56 -04:00
[ML] Explain Log Rate Spikes: Limit fields for frequent_items agg. (#143974)
Limits the fields we pass on to the frequent_items aggregation to 15. This is a trade off between speed and quality of the grouping result. The amount of fields we pass on to the agg grow the time it takes to get frequent items more than linearly and we risk timeouts with more fields.
This commit is contained in:
parent
a5411c5e89
commit
0092f0ac53
1 changed files with 24 additions and 11 deletions
|
@ -13,6 +13,8 @@ import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server';
|
|||
import type { Logger } from '@kbn/logging';
|
||||
import type { ChangePoint, FieldValuePair } from '@kbn/ml-agg-utils';
|
||||
|
||||
const FREQUENT_ITEMS_FIELDS_LIMIT = 15;
|
||||
|
||||
interface FrequentItemsAggregation extends estypes.AggregationsSamplerAggregation {
|
||||
fi: {
|
||||
buckets: Array<{ key: Record<string, string[]>; doc_count: number; support: number }>;
|
||||
|
@ -59,10 +61,19 @@ export async function fetchFrequentItems(
|
|||
emitError: (m: string) => void,
|
||||
abortSignal?: AbortSignal
|
||||
) {
|
||||
// get unique fields from change points
|
||||
const fields = [...new Set(changePoints.map((t) => t.fieldName))];
|
||||
// Sort change points by ascending p-value, necessary to apply the field limit correctly.
|
||||
const sortedChangePoints = changePoints.slice().sort((a, b) => {
|
||||
return (a.pValue ?? 0) - (b.pValue ?? 0);
|
||||
});
|
||||
|
||||
// Get up to 15 unique fields from change points with retained order
|
||||
const fields = sortedChangePoints.reduce<string[]>((p, c) => {
|
||||
if (p.length < FREQUENT_ITEMS_FIELDS_LIMIT && !p.some((d) => d === c.fieldName)) {
|
||||
p.push(c.fieldName);
|
||||
}
|
||||
return p;
|
||||
}, []);
|
||||
|
||||
// TODO add query params
|
||||
const query = {
|
||||
bool: {
|
||||
minimum_should_match: 2,
|
||||
|
@ -77,7 +88,7 @@ export async function fetchFrequentItems(
|
|||
},
|
||||
},
|
||||
],
|
||||
should: changePoints.map((t) => {
|
||||
should: sortedChangePoints.map((t) => {
|
||||
return { term: { [t.fieldName]: t.fieldValue } };
|
||||
}),
|
||||
},
|
||||
|
@ -117,16 +128,18 @@ export async function fetchFrequentItems(
|
|||
},
|
||||
};
|
||||
|
||||
const esBody = {
|
||||
query,
|
||||
aggs,
|
||||
size: 0,
|
||||
track_total_hits: true,
|
||||
};
|
||||
|
||||
const body = await client.search<unknown, { sample: FrequentItemsAggregation }>(
|
||||
{
|
||||
index,
|
||||
size: 0,
|
||||
body: {
|
||||
query,
|
||||
aggs,
|
||||
size: 0,
|
||||
track_total_hits: true,
|
||||
},
|
||||
body: esBody,
|
||||
},
|
||||
{ signal: abortSignal, maxRetries: 0 }
|
||||
);
|
||||
|
@ -167,7 +180,7 @@ export async function fetchFrequentItems(
|
|||
Object.entries(fis.key).forEach(([key, value]) => {
|
||||
result.set[key] = value[0];
|
||||
|
||||
const pValue = changePoints.find(
|
||||
const pValue = sortedChangePoints.find(
|
||||
(t) => t.fieldName === key && t.fieldValue === value[0]
|
||||
)?.pValue;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue