mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 01:38:56 -04:00
[ML] Explain Log Rate Spikes: Apply random sampling to histogram aggregations. (#144627)
Adds random sampling as an option to `fetchFieldHistograms()`. Since this already supported normal sampling, this also adds a check if only one of the two options is used. Pending a future update we might remove support for normal sampling in a follow up. The Explain Log Rate Spikes analysis makes use of the new option and applies random sampling for the mini histogram data it fetches.
This commit is contained in:
parent
c53ddb7c85
commit
80bda1a336
7 changed files with 146 additions and 10 deletions
|
@ -0,0 +1,31 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { buildRandomSamplerAggregation } from './build_random_sampler_aggregation';
|
||||
|
||||
describe('buildRandomSamplerAggregation', () => {
|
||||
const testAggs = {
|
||||
bytes_stats: {
|
||||
stats: { field: 'bytes' },
|
||||
},
|
||||
};
|
||||
|
||||
test('returns wrapped random sampler aggregation for probability of 0.01', () => {
|
||||
expect(buildRandomSamplerAggregation(testAggs, 0.01)).toEqual({
|
||||
sample: {
|
||||
random_sampler: {
|
||||
probability: 0.01,
|
||||
},
|
||||
aggs: testAggs,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test('returns un-sampled aggregation as-is for probability of 1', () => {
|
||||
expect(buildRandomSamplerAggregation(testAggs, 1)).toEqual(testAggs);
|
||||
});
|
||||
});
|
|
@ -0,0 +1,31 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
|
||||
|
||||
/**
|
||||
* Wraps the supplied aggregations in a random sampler aggregation.
|
||||
* A supplied sample probability of 1 indicates no sampling, and the aggs are returned as-is.
|
||||
*/
|
||||
export function buildRandomSamplerAggregation(
|
||||
aggs: any,
|
||||
sampleProbability: number
|
||||
): Record<string, estypes.AggregationsAggregationContainer> {
|
||||
if (sampleProbability === 1) {
|
||||
return aggs;
|
||||
}
|
||||
|
||||
return {
|
||||
sample: {
|
||||
// @ts-expect-error `random_sampler` is not yet part of `AggregationsAggregationContainer`
|
||||
random_sampler: {
|
||||
probability: sampleProbability,
|
||||
},
|
||||
aggs,
|
||||
},
|
||||
};
|
||||
}
|
|
@ -14,7 +14,9 @@ import { KBN_FIELD_TYPES } from '@kbn/field-types';
|
|||
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
|
||||
import { stringHash } from '@kbn/ml-string-hash';
|
||||
|
||||
import { buildRandomSamplerAggregation } from './build_random_sampler_aggregation';
|
||||
import { buildSamplerAggregation } from './build_sampler_aggregation';
|
||||
import { getRandomSamplerAggregationsResponsePath } from './get_random_sampler_aggregations_response_path';
|
||||
import { getSamplerAggregationsResponsePath } from './get_sampler_aggregations_response_path';
|
||||
import type { HistogramField, NumericColumnStatsMap } from './types';
|
||||
|
||||
|
@ -30,8 +32,17 @@ export const fetchAggIntervals = async (
|
|||
fields: HistogramField[],
|
||||
samplerShardSize: number,
|
||||
runtimeMappings?: estypes.MappingRuntimeFields,
|
||||
abortSignal?: AbortSignal
|
||||
abortSignal?: AbortSignal,
|
||||
randomSamplerProbability?: number
|
||||
): Promise<NumericColumnStatsMap> => {
|
||||
if (
|
||||
samplerShardSize >= 1 &&
|
||||
randomSamplerProbability !== undefined &&
|
||||
randomSamplerProbability < 1
|
||||
) {
|
||||
throw new Error('Sampler and Random Sampler cannot be used at the same time.');
|
||||
}
|
||||
|
||||
const numericColumns = fields.filter((field) => {
|
||||
return field.type === KBN_FIELD_TYPES.NUMBER || field.type === KBN_FIELD_TYPES.DATE;
|
||||
});
|
||||
|
@ -56,7 +67,10 @@ export const fetchAggIntervals = async (
|
|||
size: 0,
|
||||
body: {
|
||||
query,
|
||||
aggs: buildSamplerAggregation(minMaxAggs, samplerShardSize),
|
||||
aggs:
|
||||
randomSamplerProbability === undefined
|
||||
? buildSamplerAggregation(minMaxAggs, samplerShardSize)
|
||||
: buildRandomSamplerAggregation(minMaxAggs, randomSamplerProbability),
|
||||
size: 0,
|
||||
...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
|
||||
},
|
||||
|
@ -64,7 +78,10 @@ export const fetchAggIntervals = async (
|
|||
{ signal: abortSignal, maxRetries: 0 }
|
||||
);
|
||||
|
||||
const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
|
||||
const aggsPath =
|
||||
randomSamplerProbability === undefined
|
||||
? getSamplerAggregationsResponsePath(samplerShardSize)
|
||||
: getRandomSamplerAggregationsResponsePath(randomSamplerProbability);
|
||||
const aggregations = aggsPath.length > 0 ? get(body.aggregations, aggsPath) : body.aggregations;
|
||||
|
||||
return Object.keys(aggregations).reduce((p, aggName) => {
|
||||
|
|
|
@ -14,8 +14,10 @@ import { KBN_FIELD_TYPES } from '@kbn/field-types';
|
|||
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
|
||||
import { stringHash } from '@kbn/ml-string-hash';
|
||||
|
||||
import { buildRandomSamplerAggregation } from './build_random_sampler_aggregation';
|
||||
import { buildSamplerAggregation } from './build_sampler_aggregation';
|
||||
import { fetchAggIntervals } from './fetch_agg_intervals';
|
||||
import { getRandomSamplerAggregationsResponsePath } from './get_random_sampler_aggregations_response_path';
|
||||
import { getSamplerAggregationsResponsePath } from './get_sampler_aggregations_response_path';
|
||||
import type {
|
||||
AggCardinality,
|
||||
|
@ -138,6 +140,7 @@ export type FieldsForHistograms = Array<
|
|||
* @param fields the fields the histograms should be generated for
|
||||
* @param samplerShardSize shard_size parameter of the sampler aggregation
|
||||
* @param runtimeMappings optional runtime mappings
|
||||
* @param randomSamplerProbability optional random sampler probability
|
||||
* @returns an array of histogram data for each supplied field
|
||||
*/
|
||||
export const fetchHistogramsForFields = async (
|
||||
|
@ -147,8 +150,17 @@ export const fetchHistogramsForFields = async (
|
|||
fields: FieldsForHistograms,
|
||||
samplerShardSize: number,
|
||||
runtimeMappings?: estypes.MappingRuntimeFields,
|
||||
abortSignal?: AbortSignal
|
||||
abortSignal?: AbortSignal,
|
||||
randomSamplerProbability?: number
|
||||
) => {
|
||||
if (
|
||||
samplerShardSize >= 1 &&
|
||||
randomSamplerProbability !== undefined &&
|
||||
randomSamplerProbability < 1
|
||||
) {
|
||||
throw new Error('Sampler and Random Sampler cannot be used at the same time.');
|
||||
}
|
||||
|
||||
const aggIntervals = {
|
||||
...(await fetchAggIntervals(
|
||||
client,
|
||||
|
@ -157,7 +169,8 @@ export const fetchHistogramsForFields = async (
|
|||
fields.filter((f) => !isNumericHistogramFieldWithColumnStats(f)),
|
||||
samplerShardSize,
|
||||
runtimeMappings,
|
||||
abortSignal
|
||||
abortSignal,
|
||||
randomSamplerProbability
|
||||
)),
|
||||
...fields.filter(isNumericHistogramFieldWithColumnStats).reduce((p, field) => {
|
||||
const { interval, min, max, fieldName } = field;
|
||||
|
@ -206,7 +219,10 @@ export const fetchHistogramsForFields = async (
|
|||
size: 0,
|
||||
body: {
|
||||
query,
|
||||
aggs: buildSamplerAggregation(chartDataAggs, samplerShardSize),
|
||||
aggs:
|
||||
randomSamplerProbability === undefined
|
||||
? buildSamplerAggregation(chartDataAggs, samplerShardSize)
|
||||
: buildRandomSamplerAggregation(chartDataAggs, randomSamplerProbability),
|
||||
size: 0,
|
||||
...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
|
||||
},
|
||||
|
@ -214,7 +230,10 @@ export const fetchHistogramsForFields = async (
|
|||
{ signal: abortSignal, maxRetries: 0 }
|
||||
);
|
||||
|
||||
const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
|
||||
const aggsPath =
|
||||
randomSamplerProbability === undefined
|
||||
? getSamplerAggregationsResponsePath(samplerShardSize)
|
||||
: getRandomSamplerAggregationsResponsePath(randomSamplerProbability);
|
||||
const aggregations = aggsPath.length > 0 ? get(body.aggregations, aggsPath) : body.aggregations;
|
||||
|
||||
return fields.map((field) => {
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { getRandomSamplerAggregationsResponsePath } from './get_random_sampler_aggregations_response_path';
|
||||
|
||||
describe('getRandomSamplerAggregationsResponsePath', () => {
|
||||
test('returns correct path for random sampler probability of 0.01', () => {
|
||||
expect(getRandomSamplerAggregationsResponsePath(0.01)).toEqual(['sample']);
|
||||
});
|
||||
|
||||
test('returns correct path for random sampler probability of 1', () => {
|
||||
expect(getRandomSamplerAggregationsResponsePath(1)).toEqual([]);
|
||||
});
|
||||
});
|
|
@ -0,0 +1,17 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
// Returns the path of aggregations in the elasticsearch response, as an array,
|
||||
// depending on whether random sampling is being used.
|
||||
// A supplied randomSamplerProbability
|
||||
// (the probability parameter of the random sampler aggregation)
|
||||
// of 1 indicates no random sampling, and an empty array is returned.
|
||||
export function getRandomSamplerAggregationsResponsePath(
|
||||
randomSamplerProbability: number
|
||||
): string[] {
|
||||
return randomSamplerProbability < 1 ? ['sample'] : [];
|
||||
}
|
|
@ -337,7 +337,8 @@ export const defineExplainLogRateSpikesRoute = (
|
|||
// samplerShardSize
|
||||
-1,
|
||||
undefined,
|
||||
abortSignal
|
||||
abortSignal,
|
||||
sampleProbability
|
||||
)) as [NumericChartData]
|
||||
)[0];
|
||||
} catch (e) {
|
||||
|
@ -609,7 +610,8 @@ export const defineExplainLogRateSpikesRoute = (
|
|||
// samplerShardSize
|
||||
-1,
|
||||
undefined,
|
||||
abortSignal
|
||||
abortSignal,
|
||||
sampleProbability
|
||||
)) as [NumericChartData]
|
||||
)[0];
|
||||
} catch (e) {
|
||||
|
@ -704,7 +706,8 @@ export const defineExplainLogRateSpikesRoute = (
|
|||
// samplerShardSize
|
||||
-1,
|
||||
undefined,
|
||||
abortSignal
|
||||
abortSignal,
|
||||
sampleProbability
|
||||
)) as [NumericChartData]
|
||||
)[0];
|
||||
} catch (e) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue