[ML] Fine-tune event distribution chart data fetching. (#31179) (#31253)

* Reduces `shard_size` for the `sampler` aggregation from `50000` to `20000`.
* Sets `precision_threshold` for `cardinality` aggregations to `100`.
* Fixes a bug where `distinct_count` was used instead of `cardinality` for the check whether to apply normalizaton.
* Fixes normalization for `7.x` and above by adding `rest_total_hits_as_int: true`.
* Tweaks the text for chart's info tooltip.
This commit is contained in:
Walter Rafelsberger 2019-02-15 21:00:03 +01:00 committed by GitHub
parent 638092fdb0
commit f08bcff22e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 7 deletions

View file

@ -15,12 +15,12 @@ import { injectI18n } from '@kbn/i18n/react';
const CHART_DESCRIPTION = {
[CHART_TYPE.EVENT_DISTRIBUTION]: i18n.translate('xpack.ml.explorer.charts.infoTooltip.chartEventDistributionDescription', {
defaultMessage: 'The gray dots depict the distribution of occurrences over time for a sample of {byFieldValuesParam} with' +
defaultMessage: 'The gray dots depict the approximate distribution of occurrences over time for a sample of {byFieldValuesParam} with' +
' more frequent event types at the top and rarer ones at the bottom.',
values: { byFieldValuesParam: 'by_field_values' }
}),
[CHART_TYPE.POPULATION_DISTRIBUTION]: i18n.translate('xpack.ml.explorer.charts.infoTooltip.chartPopulationDistributionDescription', {
defaultMessage: 'The gray dots depict the distribution of values over time for a sample of {overFieldValuesParam}.',
defaultMessage: 'The gray dots depict the approximate distribution of values over time for a sample of {overFieldValuesParam}.',
values: { overFieldValuesParam: 'over_field_values' }
}),
};

View file

@ -1177,7 +1177,7 @@ function getMetricData(
types,
entityFields,
query,
metricFunction,
metricFunction, // ES aggregation name
metricFieldName,
timeFieldName,
earliestMs,
@ -1408,16 +1408,17 @@ function getEventRateData(
// Extra query object can be supplied, or pass null if no additional query.
// Returned response contains a results property, which is an object
// of document counts against time (epoch millis).
const SAMPLER_TOP_TERMS_SHARD_SIZE = 50000;
const SAMPLER_TOP_TERMS_SHARD_SIZE = 20000;
const ENTITY_AGGREGATION_SIZE = 10;
const AGGREGATION_MIN_DOC_COUNT = 1;
const CARDINALITY_PRECISION_THRESHOLD = 100;
function getEventDistributionData(
index,
types,
splitField,
filterField = null,
query,
metricFunction,
metricFunction, // ES aggregation name
metricFieldName,
timeFieldName,
earliestMs,
@ -1524,12 +1525,17 @@ function getEventDistributionData(
if (metricFunction === 'percentiles') {
metricAgg[metricFunction].percents = [ML_MEDIAN_PERCENTS];
}
if (metricFunction === 'cardinality') {
metricAgg[metricFunction].precision_threshold = CARDINALITY_PRECISION_THRESHOLD;
}
body.aggs.sample.aggs.byTime.aggs.entities.aggs.metric = metricAgg;
}
ml.esSearch({
index,
body
body,
rest_total_hits_as_int: true,
})
.then((resp) => {
// Because of the sampling, results of metricFunctions which use sum or count
@ -1552,7 +1558,7 @@ function getEventDistributionData(
if (
metricFunction === 'count'
|| metricFunction === 'distinct_count'
|| metricFunction === 'cardinality'
|| metricFunction === 'sum'
) {
value = value * normalizeFactor;