[ML] Explain Log Rate Spikes: Apply random sampling to histogram aggregations. (#144627)

Adds random sampling as an option to `fetchFieldHistograms()`. Since this already supported normal sampling, this also adds a check if only one of the two options is used. Pending a future update we might remove support for normal sampling in a follow up. The Explain Log Rate Spikes analysis makes use of the new option and applies random sampling for the mini histogram data it fetches.
This commit is contained in:
Walter Rafelsberger 2022-11-08 17:25:36 +01:00 committed by GitHub
parent c53ddb7c85
commit 80bda1a336
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 146 additions and 10 deletions

View file

@ -0,0 +1,31 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { buildRandomSamplerAggregation } from './build_random_sampler_aggregation';
describe('buildRandomSamplerAggregation', () => {
const testAggs = {
bytes_stats: {
stats: { field: 'bytes' },
},
};
test('returns wrapped random sampler aggregation for probability of 0.01', () => {
expect(buildRandomSamplerAggregation(testAggs, 0.01)).toEqual({
sample: {
random_sampler: {
probability: 0.01,
},
aggs: testAggs,
},
});
});
test('returns un-sampled aggregation as-is for probability of 1', () => {
expect(buildRandomSamplerAggregation(testAggs, 1)).toEqual(testAggs);
});
});

View file

@ -0,0 +1,31 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
/**
* Wraps the supplied aggregations in a random sampler aggregation.
* A supplied sample probability of 1 indicates no sampling, and the aggs are returned as-is.
*/
export function buildRandomSamplerAggregation(
aggs: any,
sampleProbability: number
): Record<string, estypes.AggregationsAggregationContainer> {
if (sampleProbability === 1) {
return aggs;
}
return {
sample: {
// @ts-expect-error `random_sampler` is not yet part of `AggregationsAggregationContainer`
random_sampler: {
probability: sampleProbability,
},
aggs,
},
};
}

View file

@ -14,7 +14,9 @@ import { KBN_FIELD_TYPES } from '@kbn/field-types';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import { stringHash } from '@kbn/ml-string-hash';
import { buildRandomSamplerAggregation } from './build_random_sampler_aggregation';
import { buildSamplerAggregation } from './build_sampler_aggregation';
import { getRandomSamplerAggregationsResponsePath } from './get_random_sampler_aggregations_response_path';
import { getSamplerAggregationsResponsePath } from './get_sampler_aggregations_response_path';
import type { HistogramField, NumericColumnStatsMap } from './types';
@ -30,8 +32,17 @@ export const fetchAggIntervals = async (
fields: HistogramField[],
samplerShardSize: number,
runtimeMappings?: estypes.MappingRuntimeFields,
abortSignal?: AbortSignal
abortSignal?: AbortSignal,
randomSamplerProbability?: number
): Promise<NumericColumnStatsMap> => {
if (
samplerShardSize >= 1 &&
randomSamplerProbability !== undefined &&
randomSamplerProbability < 1
) {
throw new Error('Sampler and Random Sampler cannot be used at the same time.');
}
const numericColumns = fields.filter((field) => {
return field.type === KBN_FIELD_TYPES.NUMBER || field.type === KBN_FIELD_TYPES.DATE;
});
@ -56,7 +67,10 @@ export const fetchAggIntervals = async (
size: 0,
body: {
query,
aggs: buildSamplerAggregation(minMaxAggs, samplerShardSize),
aggs:
randomSamplerProbability === undefined
? buildSamplerAggregation(minMaxAggs, samplerShardSize)
: buildRandomSamplerAggregation(minMaxAggs, randomSamplerProbability),
size: 0,
...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
},
@ -64,7 +78,10 @@ export const fetchAggIntervals = async (
{ signal: abortSignal, maxRetries: 0 }
);
const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
const aggsPath =
randomSamplerProbability === undefined
? getSamplerAggregationsResponsePath(samplerShardSize)
: getRandomSamplerAggregationsResponsePath(randomSamplerProbability);
const aggregations = aggsPath.length > 0 ? get(body.aggregations, aggsPath) : body.aggregations;
return Object.keys(aggregations).reduce((p, aggName) => {

View file

@ -14,8 +14,10 @@ import { KBN_FIELD_TYPES } from '@kbn/field-types';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import { stringHash } from '@kbn/ml-string-hash';
import { buildRandomSamplerAggregation } from './build_random_sampler_aggregation';
import { buildSamplerAggregation } from './build_sampler_aggregation';
import { fetchAggIntervals } from './fetch_agg_intervals';
import { getRandomSamplerAggregationsResponsePath } from './get_random_sampler_aggregations_response_path';
import { getSamplerAggregationsResponsePath } from './get_sampler_aggregations_response_path';
import type {
AggCardinality,
@ -138,6 +140,7 @@ export type FieldsForHistograms = Array<
* @param fields the fields the histograms should be generated for
* @param samplerShardSize shard_size parameter of the sampler aggregation
* @param runtimeMappings optional runtime mappings
* @param randomSamplerProbability optional random sampler probability
* @returns an array of histogram data for each supplied field
*/
export const fetchHistogramsForFields = async (
@ -147,8 +150,17 @@ export const fetchHistogramsForFields = async (
fields: FieldsForHistograms,
samplerShardSize: number,
runtimeMappings?: estypes.MappingRuntimeFields,
abortSignal?: AbortSignal
abortSignal?: AbortSignal,
randomSamplerProbability?: number
) => {
if (
samplerShardSize >= 1 &&
randomSamplerProbability !== undefined &&
randomSamplerProbability < 1
) {
throw new Error('Sampler and Random Sampler cannot be used at the same time.');
}
const aggIntervals = {
...(await fetchAggIntervals(
client,
@ -157,7 +169,8 @@ export const fetchHistogramsForFields = async (
fields.filter((f) => !isNumericHistogramFieldWithColumnStats(f)),
samplerShardSize,
runtimeMappings,
abortSignal
abortSignal,
randomSamplerProbability
)),
...fields.filter(isNumericHistogramFieldWithColumnStats).reduce((p, field) => {
const { interval, min, max, fieldName } = field;
@ -206,7 +219,10 @@ export const fetchHistogramsForFields = async (
size: 0,
body: {
query,
aggs: buildSamplerAggregation(chartDataAggs, samplerShardSize),
aggs:
randomSamplerProbability === undefined
? buildSamplerAggregation(chartDataAggs, samplerShardSize)
: buildRandomSamplerAggregation(chartDataAggs, randomSamplerProbability),
size: 0,
...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
},
@ -214,7 +230,10 @@ export const fetchHistogramsForFields = async (
{ signal: abortSignal, maxRetries: 0 }
);
const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
const aggsPath =
randomSamplerProbability === undefined
? getSamplerAggregationsResponsePath(samplerShardSize)
: getRandomSamplerAggregationsResponsePath(randomSamplerProbability);
const aggregations = aggsPath.length > 0 ? get(body.aggregations, aggsPath) : body.aggregations;
return fields.map((field) => {

View file

@ -0,0 +1,18 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { getRandomSamplerAggregationsResponsePath } from './get_random_sampler_aggregations_response_path';
describe('getRandomSamplerAggregationsResponsePath', () => {
test('returns correct path for random sampler probability of 0.01', () => {
expect(getRandomSamplerAggregationsResponsePath(0.01)).toEqual(['sample']);
});
test('returns correct path for random sampler probability of 1', () => {
expect(getRandomSamplerAggregationsResponsePath(1)).toEqual([]);
});
});

View file

@ -0,0 +1,17 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
// Returns the path of aggregations in the elasticsearch response, as an array,
// depending on whether random sampling is being used.
// A supplied randomSamplerProbability
// (the probability parameter of the random sampler aggregation)
// of 1 indicates no random sampling, and an empty array is returned.
export function getRandomSamplerAggregationsResponsePath(
randomSamplerProbability: number
): string[] {
return randomSamplerProbability < 1 ? ['sample'] : [];
}

View file

@ -337,7 +337,8 @@ export const defineExplainLogRateSpikesRoute = (
// samplerShardSize
-1,
undefined,
abortSignal
abortSignal,
sampleProbability
)) as [NumericChartData]
)[0];
} catch (e) {
@ -609,7 +610,8 @@ export const defineExplainLogRateSpikesRoute = (
// samplerShardSize
-1,
undefined,
abortSignal
abortSignal,
sampleProbability
)) as [NumericChartData]
)[0];
} catch (e) {
@ -704,7 +706,8 @@ export const defineExplainLogRateSpikesRoute = (
// samplerShardSize
-1,
undefined,
abortSignal
abortSignal,
sampleProbability
)) as [NumericChartData]
)[0];
} catch (e) {