[ML] Improves bucket span estimator stability. (#21282)

- Fixes the bucket span estimator when median is selected as a detector function. agg.type.name is median and therefor not usable for an Elasticsearch aggregation. agg.type.dslName is percentile and is the correct mapping. .dslName is also used for the aggregations used for the preview charts. - 7.0 will introduce a search.max_buckets setting which defaults to 10000. This could lead to failing bucket estimations because the values used for creating the required aggregations could result in more buckets. This PR fixes it by taking search.max_buckets into account when calculating the time range used for the bucket estimation. (Since 6.2 that setting is available so backporting this to current unreleased minor releases 6.4 and 6.5)
2025-04-24 01:38:56 -04:00 · 2018-07-27 10:43:59 +02:00 · 2018-07-27 10:43:59 +02:00 · 3b6c9e3195
commit 3b6c9e3195
parent 256954259b
3 changed files with 58 additions and 33 deletions
--- a/x-pack/plugins/ml/public/jobs/new_job/simple/components/bucket_span_estimator/bucket_span_estimator_directive.js
+++ b/x-pack/plugins/ml/public/jobs/new_job/simple/components/bucket_span_estimator/bucket_span_estimator_directive.js
@ -66,14 +66,14 @@ module.directive('mlBucketSpanEstimator', function () {
          // single metric config
          const fieldName = ($scope.formConfig.field === null) ? null : $scope.formConfig.field.name;
          data.fields.push(fieldName);
-          data.aggTypes.push($scope.formConfig.agg.type.name);
+          data.aggTypes.push($scope.formConfig.agg.type.dslName);
        } else {
          // multi metric config
          Object.keys($scope.formConfig.fields).map((id) => {
            const field = $scope.formConfig.fields[id];
            const fieldName = (field.id === EVENT_RATE_COUNT_FIELD) ? null : field.name;
            data.fields.push(fieldName);
-            data.aggTypes.push(field.agg.type.name);
+            data.aggTypes.push(field.agg.type.dslName);
          });
        }

--- a/x-pack/plugins/ml/server/models/bucket_span_estimator/tests/bucket_span_estimator.js
+++ b/x-pack/plugins/ml/server/models/bucket_span_estimator/tests/bucket_span_estimator.js
@ -40,7 +40,7 @@ describe('ML - BucketSpanEstimator', () => {
          }
        }
      }).catch((catchData) => {
-        expect(catchData).to.be('BucketSpanEstimator: run has stopped because no checks returned a valid interval');
+        expect(catchData).to.be('Unable to retrieve cluster setting search.max_buckets');
        done();
      });

--- a/x-pack/plugins/ml/server/models/bucket_span_estimator/bucket_span_estimator.js
+++ b/x-pack/plugins/ml/server/models/bucket_span_estimator/bucket_span_estimator.js
@ -17,7 +17,7 @@ export function estimateBucketSpanFactory(callWithRequest) {
  const SingleSeriesChecker = singleSeriesCheckerFactory(callWithRequest);

  class BucketSpanEstimator {
-    constructor({ index, timeField, aggTypes, fields, duration, query, splitField }, splitFieldValues) {
+    constructor({ index, timeField, aggTypes, fields, duration, query, splitField }, splitFieldValues, maxBuckets) {
      this.index = index;
      this.timeField = timeField;
      this.aggTypes = aggTypes;
@ -32,9 +32,15 @@ export function estimateBucketSpanFactory(callWithRequest) {
        minimumBucketSpanMS: 0
      };

-      // only run the tests over the last 250 hours of data
+      // determine durations for bucket span estimation
+      // taking into account the clusters' search.max_buckets settings
+      // the polled_data_checker uses an aggregation interval of 1 minute
+      // so that's the smallest interval we have to check for not to
+      // exceed search.max_buckets.
+      const ONE_MINUTE_MS = 60000;
      const ONE_HOUR_MS = 3600000;
-      const HOUR_MULTIPLIER = 250;
+      // only run the tests over the last 250 hours of data at max
+      const HOUR_MULTIPLIER = Math.min(250, Math.floor((maxBuckets * ONE_MINUTE_MS) / ONE_HOUR_MS));
      const timePickerDurationLength = (this.duration.end - this.duration.start);
      const multiplierDurationLength = (ONE_HOUR_MS * HOUR_MULTIPLIER);

@ -315,35 +321,54 @@ export function estimateBucketSpanFactory(callWithRequest) {
    }

    return new Promise((resolve, reject) => {
-      const runEstimator = (splitFieldValues = []) => {
-        const bucketSpanEstimator = new BucketSpanEstimator(
-          formConfig,
-          splitFieldValues
-        );
+      // fetch the `search.max_buckets` cluster setting so we're able to
+      // adjust aggregations to not exceed that limit.
+      callWithRequest('cluster.getSettings', {
+        flatSettings: true,
+        includeDefaults: true,
+        filterPath: '*.*max_buckets'
+      })
+        .then((settings) => {
+          if (typeof settings !== 'object' || typeof settings.defaults !== 'object') {
+            reject('Unable to retrieve cluster setting search.max_buckets');
+          }

-        bucketSpanEstimator.run()
-          .then((resp) => {
-            resolve(resp);
-          })
-          .catch((resp) => {
-            reject(resp);
-          });
-      };
+          const maxBuckets = parseInt(settings.defaults['search.max_buckets']);

-      // a partition has been selected, so we need to load some field values to use in the
-      // bucket span tests.
-      if (formConfig.splitField !== undefined) {
-        getRandomFieldValues(formConfig.index, formConfig.splitField, formConfig.query)
-          .then((splitFieldValues) => {
-            runEstimator(splitFieldValues);
-          })
-          .catch((resp) => {
-            reject(resp);
-          });
-      } else {
-        // no partition field selected or we're in the single metric config
-        runEstimator();
-      }
+          const runEstimator = (splitFieldValues = []) => {
+            const bucketSpanEstimator = new BucketSpanEstimator(
+              formConfig,
+              splitFieldValues,
+              maxBuckets
+            );
+
+            bucketSpanEstimator.run()
+              .then((resp) => {
+                resolve(resp);
+              })
+              .catch((resp) => {
+                reject(resp);
+              });
+          };
+
+          // a partition has been selected, so we need to load some field values to use in the
+          // bucket span tests.
+          if (formConfig.splitField !== undefined) {
+            getRandomFieldValues(formConfig.index, formConfig.splitField, formConfig.query)
+              .then((splitFieldValues) => {
+                runEstimator(splitFieldValues);
+              })
+              .catch((resp) => {
+                reject(resp);
+              });
+          } else {
+            // no partition field selected or we're in the single metric config
+            runEstimator();
+          }
+        })
+        .catch((resp) => {
+          reject(resp);
+        });

    });
  };