[ML] Use a new ML endpoint to estimate a model memory (#60376)

* [ML] refactor calculate_model_memory_limit route, use estimateModelMemory endpoint * [ML] refactor validate_model_memory_limit, migrate tests to jest * [ML] fix typing issue * [ML] start estimateModelMemory url with / * [ML] fix typo, filter mlcategory * [ML] extract getCardinalities function * [ML] fields_service.ts * [ML] wip getMaxBucketCardinality * [ML] refactor and comments * [ML] fix aggs keys with special characters, fix integration tests * [ML] use pre-defined job types * [ML] fallback to 0 in case max bucket cardinality receives null * [ML] calculateModelMemoryLimit on influencers change * [ML] fix maxModelMemoryLimit * [ML] cap aggregation to max 1000 buckets * [ML] rename intervalDuration
2025-04-24 17:59:23 -04:00 · 2020-03-19 16:45:40 +01:00 · 2020-03-19 16:45:40 +01:00 · 7aa4651292
commit 7aa4651292
parent ae0e35041e
19 changed files with 817 additions and 632 deletions
--- a/x-pack/plugins/ml/common/types/anomaly_detection_jobs/job.ts
+++ b/x-pack/plugins/ml/common/types/anomaly_detection_jobs/job.ts
@ -81,7 +81,7 @@ export interface ModelPlotConfig {

 // TODO, finish this when it's needed
 export interface CustomRule {
-  actions: any;
-  scope: object;
-  conditions: object;
+  actions: string[];
+  scope?: object;
+  conditions: any[];
 }
--- a/x-pack/plugins/ml/public/application/jobs/new_job/common/job_creator/multi_metric_job_creator.ts
+++ b/x-pack/plugins/ml/public/application/jobs/new_job/common/job_creator/multi_metric_job_creator.ts
@ -88,16 +88,13 @@ export class MultiMetricJobCreator extends JobCreator {

  // called externally to set the model memory limit based current detector configuration
  public async calculateModelMemoryLimit() {
-    if (this._splitField === null) {
-      // not split field, use the default
+    if (this.jobConfig.analysis_config.detectors.length === 0) {
      this.modelMemoryLimit = DEFAULT_MODEL_MEMORY_LIMIT;
    } else {
      const { modelMemoryLimit } = await ml.calculateModelMemoryLimit({
+        analysisConfig: this.jobConfig.analysis_config,
        indexPattern: this._indexPatternTitle,
-        splitFieldName: this._splitField.name,
        query: this._datafeed_config.query,
-        fieldNames: this.fields.map(f => f.id),
-        influencerNames: this._influencers,
        timeFieldName: this._job_config.data_description.time_field,
        earliestMs: this._start,
        latestMs: this._end,
--- a/x-pack/plugins/ml/public/application/services/ml_api_service/index.ts
+++ b/x-pack/plugins/ml/public/application/services/ml_api_service/index.ts
@ -22,6 +22,7 @@ import {
  Datafeed,
  CombinedJob,
  Detector,
+  AnalysisConfig,
 } from '../../../../common/types/anomaly_detection_jobs';
 import { ES_AGGREGATION } from '../../../../common/constants/aggregation_types';
 import { FieldRequestConfig } from '../../datavisualizer/index_based/common';
@ -532,30 +533,24 @@ export const ml = {
  },

  calculateModelMemoryLimit({
+    analysisConfig,
    indexPattern,
-    splitFieldName,
    query,
-    fieldNames,
-    influencerNames,
    timeFieldName,
    earliestMs,
    latestMs,
  }: {
+    analysisConfig: AnalysisConfig;
    indexPattern: string;
-    splitFieldName: string;
    query: any;
-    fieldNames: string[];
-    influencerNames: string[];
    timeFieldName: string;
    earliestMs: number;
    latestMs: number;
  }) {
    const body = JSON.stringify({
+      analysisConfig,
      indexPattern,
-      splitFieldName,
      query,
-      fieldNames,
-      influencerNames,
      timeFieldName,
      earliestMs,
      latestMs,
--- a/x-pack/plugins/ml/server/client/elasticsearch_ml.ts
+++ b/x-pack/plugins/ml/server/client/elasticsearch_ml.ts
@ -413,6 +413,14 @@ export const elasticsearchJsPlugin = (Client: any, config: any, components: any)
    method: 'POST',
  });

+  ml.estimateModelMemory = ca({
+    url: {
+      fmt: '/_ml/anomaly_detectors/_estimate_model_memory',
+    },
+    needBody: true,
+    method: 'POST',
+  });
+
  ml.datafeedPreview = ca({
    url: {
      fmt: '/_ml/datafeeds/<%=datafeedId%>/_preview',
--- a/x-pack/plugins/ml/server/models/calculate_model_memory_limit/calculate_model_memory_limit.d.ts
+++ b/x-pack/plugins/ml/server/models/calculate_model_memory_limit/calculate_model_memory_limit.d.ts
@ -1,20 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License;
- * you may not use this file except in compliance with the Elastic License.
- */
-
-import { APICaller } from 'kibana/server';
-
-export function calculateModelMemoryLimitProvider(
-  callAsCurrentUser: APICaller
-): (
-  indexPattern: string,
-  splitFieldName: string,
-  query: any,
-  fieldNames: any,
-  influencerNames: any, // string[] ?
-  timeFieldName: string,
-  earliestMs: number,
-  latestMs: number
-) => Promise<any>;
--- a/x-pack/plugins/ml/server/models/calculate_model_memory_limit/calculate_model_memory_limit.js
+++ b/x-pack/plugins/ml/server/models/calculate_model_memory_limit/calculate_model_memory_limit.js
@ -1,117 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License;
- * you may not use this file except in compliance with the Elastic License.
- */
-
-// calculates the size of the model memory limit used in the job config
-// based on the cardinality of the field being used to split the data.
-// the limit should be 10MB plus 20kB per series, rounded up to the nearest MB.
-import numeral from '@elastic/numeral';
-import { fieldsServiceProvider } from '../fields_service';
-
-export function calculateModelMemoryLimitProvider(callAsCurrentUser) {
-  const fieldsService = fieldsServiceProvider(callAsCurrentUser);
-
-  return function calculateModelMemoryLimit(
-    indexPattern,
-    splitFieldName,
-    query,
-    fieldNames,
-    influencerNames,
-    timeFieldName,
-    earliestMs,
-    latestMs,
-    allowMMLGreaterThanMax = false
-  ) {
-    return new Promise((response, reject) => {
-      const limits = {};
-      callAsCurrentUser('ml.info')
-        .then(resp => {
-          if (resp.limits !== undefined && resp.limits.max_model_memory_limit !== undefined) {
-            limits.max_model_memory_limit = resp.limits.max_model_memory_limit;
-          }
-        })
-        .catch(error => {
-          reject(error);
-        });
-
-      // find the cardinality of the split field
-      function splitFieldCardinality() {
-        return fieldsService.getCardinalityOfFields(
-          indexPattern,
-          [splitFieldName],
-          query,
-          timeFieldName,
-          earliestMs,
-          latestMs
-        );
-      }
-
-      // find the cardinality of an influencer field
-      function influencerCardinality(influencerName) {
-        return fieldsService.getCardinalityOfFields(
-          indexPattern,
-          [influencerName],
-          query,
-          timeFieldName,
-          earliestMs,
-          latestMs
-        );
-      }
-
-      const calculations = [
-        splitFieldCardinality(),
-        ...influencerNames.map(inf => influencerCardinality(inf)),
-      ];
-
-      Promise.all(calculations)
-        .then(responses => {
-          let mmlMB = 0;
-          const MB = 1000;
-          responses.forEach((resp, i) => {
-            let mmlKB = 0;
-            if (i === 0) {
-              // first in the list is the basic calculation.
-              // a base of 10MB plus 64KB per series per detector
-              // i.e. 10000KB + (64KB * cardinality of split field * number or detectors)
-              const cardinality = resp[splitFieldName];
-              mmlKB = 10000;
-              const SERIES_MULTIPLIER = 64;
-              const numberOfFields = fieldNames.length;
-
-              if (cardinality !== undefined) {
-                mmlKB += SERIES_MULTIPLIER * cardinality * numberOfFields;
-              }
-            } else {
-              // the rest of the calculations are for influencers fields
-              // 10KB per series of influencer field
-              // i.e. 10KB * cardinality of influencer field
-              const cardinality = resp[splitFieldName];
-              mmlKB = 0;
-              const SERIES_MULTIPLIER = 10;
-              if (cardinality !== undefined) {
-                mmlKB = SERIES_MULTIPLIER * cardinality;
-              }
-            }
-            // convert the total to MB, rounding up.
-            mmlMB += Math.ceil(mmlKB / MB);
-          });
-
-          // if max_model_memory_limit has been set,
-          // make sure the estimated value is not greater than it.
-          if (allowMMLGreaterThanMax === false && limits.max_model_memory_limit !== undefined) {
-            const maxBytes = numeral(limits.max_model_memory_limit.toUpperCase()).value();
-            const mmlBytes = numeral(`${mmlMB}MB`).value();
-            if (mmlBytes > maxBytes) {
-              mmlMB = Math.floor(maxBytes / numeral('1MB').value());
-            }
-          }
-          response({ modelMemoryLimit: `${mmlMB}MB` });
-        })
-        .catch(error => {
-          reject(error);
-        });
-    });
-  };
-}
--- a/x-pack/plugins/ml/server/models/calculate_model_memory_limit/calculate_model_memory_limit.ts
+++ b/x-pack/plugins/ml/server/models/calculate_model_memory_limit/calculate_model_memory_limit.ts
@ -0,0 +1,187 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+import numeral from '@elastic/numeral';
+import { APICaller } from 'kibana/server';
+import { AnalysisConfig } from '../../../common/types/anomaly_detection_jobs';
+import { fieldsServiceProvider } from '../fields_service';
+
+interface ModelMemoryEstimationResult {
+  /**
+   * Result model memory limit
+   */
+  modelMemoryLimit: string;
+  /**
+   * Estimated model memory by elasticsearch ml endpoint
+   */
+  estimatedModelMemoryLimit: string;
+  /**
+   * Maximum model memory limit
+   */
+  maxModelMemoryLimit?: string;
+}
+
+/**
+ * Response of the _estimate_model_memory endpoint.
+ */
+export interface ModelMemoryEstimate {
+  model_memory_estimate: string;
+}
+
+/**
+ * Retrieves overall and max bucket cardinalities.
+ */
+async function getCardinalities(
+  callAsCurrentUser: APICaller,
+  analysisConfig: AnalysisConfig,
+  indexPattern: string,
+  query: any,
+  timeFieldName: string,
+  earliestMs: number,
+  latestMs: number
+): Promise<{
+  overallCardinality: { [key: string]: number };
+  maxBucketCardinality: { [key: string]: number };
+}> {
+  /**
+   * Fields not involved in cardinality check
+   */
+  const excludedKeywords = new Set<string>(
+    /**
+     * The keyword which is used to mean the output of categorization,
+     * so it will have cardinality zero in the actual input data.
+     */
+    'mlcategory'
+  );
+
+  const fieldsService = fieldsServiceProvider(callAsCurrentUser);
+
+  const { detectors, influencers, bucket_span: bucketSpan } = analysisConfig;
+
+  let overallCardinality = {};
+  let maxBucketCardinality = {};
+  const overallCardinalityFields: Set<string> = detectors.reduce(
+    (
+      acc,
+      {
+        by_field_name: byFieldName,
+        partition_field_name: partitionFieldName,
+        over_field_name: overFieldName,
+      }
+    ) => {
+      [byFieldName, partitionFieldName, overFieldName]
+        .filter(field => field !== undefined && field !== '' && !excludedKeywords.has(field))
+        .forEach(key => {
+          acc.add(key as string);
+        });
+      return acc;
+    },
+    new Set<string>()
+  );
+
+  const maxBucketFieldCardinalities: string[] = influencers.filter(
+    influencerField =>
+      typeof influencerField === 'string' &&
+      !excludedKeywords.has(influencerField) &&
+      !!influencerField &&
+      !overallCardinalityFields.has(influencerField)
+  ) as string[];
+
+  if (overallCardinalityFields.size > 0) {
+    overallCardinality = await fieldsService.getCardinalityOfFields(
+      indexPattern,
+      [...overallCardinalityFields],
+      query,
+      timeFieldName,
+      earliestMs,
+      latestMs
+    );
+  }
+
+  if (maxBucketFieldCardinalities.length > 0) {
+    maxBucketCardinality = await fieldsService.getMaxBucketCardinalities(
+      indexPattern,
+      maxBucketFieldCardinalities,
+      query,
+      timeFieldName,
+      earliestMs,
+      latestMs,
+      bucketSpan
+    );
+  }
+
+  return {
+    overallCardinality,
+    maxBucketCardinality,
+  };
+}
+
+export function calculateModelMemoryLimitProvider(callAsCurrentUser: APICaller) {
+  /**
+   * Retrieves an estimated size of the model memory limit used in the job config
+   * based on the cardinality of the fields being used to split the data
+   * and influencers.
+   */
+  return async function calculateModelMemoryLimit(
+    analysisConfig: AnalysisConfig,
+    indexPattern: string,
+    query: any,
+    timeFieldName: string,
+    earliestMs: number,
+    latestMs: number,
+    allowMMLGreaterThanMax = false
+  ): Promise<ModelMemoryEstimationResult> {
+    let maxModelMemoryLimit;
+    try {
+      const resp = await callAsCurrentUser('ml.info');
+      if (resp?.limits?.max_model_memory_limit !== undefined) {
+        maxModelMemoryLimit = resp.limits.max_model_memory_limit.toUpperCase();
+      }
+    } catch (e) {
+      throw new Error('Unable to retrieve max model memory limit');
+    }
+
+    const { overallCardinality, maxBucketCardinality } = await getCardinalities(
+      callAsCurrentUser,
+      analysisConfig,
+      indexPattern,
+      query,
+      timeFieldName,
+      earliestMs,
+      latestMs
+    );
+
+    const estimatedModelMemoryLimit = (
+      await callAsCurrentUser<ModelMemoryEstimate>('ml.estimateModelMemory', {
+        body: {
+          analysis_config: analysisConfig,
+          overall_cardinality: overallCardinality,
+          max_bucket_cardinality: maxBucketCardinality,
+        },
+      })
+    ).model_memory_estimate.toUpperCase();
+
+    let modelMemoryLimit: string = estimatedModelMemoryLimit;
+    // if max_model_memory_limit has been set,
+    // make sure the estimated value is not greater than it.
+    if (!allowMMLGreaterThanMax && maxModelMemoryLimit !== undefined) {
+      // @ts-ignore
+      const maxBytes = numeral(maxModelMemoryLimit).value();
+      // @ts-ignore
+      const mmlBytes = numeral(estimatedModelMemoryLimit).value();
+      if (mmlBytes > maxBytes) {
+        // @ts-ignore
+        modelMemoryLimit = `${Math.floor(maxBytes / numeral('1MB').value())}MB`;
+      }
+    }
+
+    return {
+      estimatedModelMemoryLimit,
+      modelMemoryLimit,
+      ...(maxModelMemoryLimit ? { maxModelMemoryLimit } : {}),
+    };
+  };
+}
--- a/x-pack/plugins/ml/server/models/fields_service/fields_service.d.ts
+++ b/x-pack/plugins/ml/server/models/fields_service/fields_service.d.ts
@ -1,21 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License;
- * you may not use this file except in compliance with the Elastic License.
- */
-
-import { APICaller } from 'kibana/server';
-
-export function fieldsServiceProvider(
-  callAsCurrentUser: APICaller
-): {
-  getCardinalityOfFields: (
-    index: string[] | string,
-    fieldNames: string[],
-    query: any,
-    timeFieldName: string,
-    earliestMs: number,
-    latestMs: number
-  ) => Promise<any>;
-  getTimeFieldRange: (index: string[] | string, timeFieldName: string, query: any) => Promise<any>;
-};
--- a/x-pack/plugins/ml/server/models/fields_service/fields_service.js
+++ b/x-pack/plugins/ml/server/models/fields_service/fields_service.js
@ -1,148 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License;
- * you may not use this file except in compliance with the Elastic License.
- */
-
-// Service for carrying out queries to obtain data
-// specific to fields in Elasticsearch indices.
-
-export function fieldsServiceProvider(callAsCurrentUser) {
-  // Obtains the cardinality of one or more fields.
-  // Returns an Object whose keys are the names of the fields,
-  // with values equal to the cardinality of the field.
-  // Any of the supplied fieldNames which are not aggregatable will
-  // be omitted from the returned Object.
-  function getCardinalityOfFields(index, fieldNames, query, timeFieldName, earliestMs, latestMs) {
-    // First check that each of the supplied fieldNames are aggregatable,
-    // then obtain the cardinality for each of the aggregatable fields.
-    return new Promise((resolve, reject) => {
-      callAsCurrentUser('fieldCaps', {
-        index,
-        fields: fieldNames,
-      })
-        .then(fieldCapsResp => {
-          const aggregatableFields = [];
-          fieldNames.forEach(fieldName => {
-            const fieldInfo = fieldCapsResp.fields[fieldName];
-            const typeKeys = fieldInfo !== undefined ? Object.keys(fieldInfo) : [];
-            if (typeKeys.length > 0) {
-              const fieldType = typeKeys[0];
-              const isFieldAggregatable = fieldInfo[fieldType].aggregatable;
-              if (isFieldAggregatable === true) {
-                aggregatableFields.push(fieldName);
-              }
-            }
-          });
-
-          if (aggregatableFields.length > 0) {
-            // Build the criteria to use in the bool filter part of the request.
-            // Add criteria for the time range and the datafeed config query.
-            const mustCriteria = [
-              {
-                range: {
-                  [timeFieldName]: {
-                    gte: earliestMs,
-                    lte: latestMs,
-                    format: 'epoch_millis',
-                  },
-                },
-              },
-            ];
-
-            if (query) {
-              mustCriteria.push(query);
-            }
-
-            const aggs = aggregatableFields.reduce((obj, field) => {
-              obj[field] = { cardinality: { field } };
-              return obj;
-            }, {});
-
-            const body = {
-              query: {
-                bool: {
-                  must: mustCriteria,
-                },
-              },
-              size: 0,
-              _source: {
-                excludes: [],
-              },
-              aggs,
-            };
-
-            callAsCurrentUser('search', {
-              index,
-              body,
-            })
-              .then(resp => {
-                const aggregations = resp.aggregations;
-                if (aggregations !== undefined) {
-                  const results = aggregatableFields.reduce((obj, field) => {
-                    obj[field] = (aggregations[field] || { value: 0 }).value;
-                    return obj;
-                  }, {});
-                  resolve(results);
-                } else {
-                  resolve({});
-                }
-              })
-              .catch(resp => {
-                reject(resp);
-              });
-          } else {
-            // None of the fields are aggregatable. Return empty Object.
-            resolve({});
-          }
-        })
-        .catch(resp => {
-          reject(resp);
-        });
-    });
-  }
-
-  function getTimeFieldRange(index, timeFieldName, query) {
-    return new Promise((resolve, reject) => {
-      const obj = { success: true, start: { epoch: 0, string: '' }, end: { epoch: 0, string: '' } };
-
-      callAsCurrentUser('search', {
-        index,
-        size: 0,
-        body: {
-          query,
-          aggs: {
-            earliest: {
-              min: {
-                field: timeFieldName,
-              },
-            },
-            latest: {
-              max: {
-                field: timeFieldName,
-              },
-            },
-          },
-        },
-      })
-        .then(resp => {
-          if (resp.aggregations && resp.aggregations.earliest && resp.aggregations.latest) {
-            obj.start.epoch = resp.aggregations.earliest.value;
-            obj.start.string = resp.aggregations.earliest.value_as_string;
-
-            obj.end.epoch = resp.aggregations.latest.value;
-            obj.end.string = resp.aggregations.latest.value_as_string;
-          }
-          resolve(obj);
-        })
-        .catch(resp => {
-          reject(resp);
-        });
-    });
-  }
-
-  return {
-    getCardinalityOfFields,
-    getTimeFieldRange,
-  };
-}
--- a/x-pack/plugins/ml/server/models/fields_service/fields_service.ts
+++ b/x-pack/plugins/ml/server/models/fields_service/fields_service.ts
@ -0,0 +1,296 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+import Boom from 'boom';
+import { APICaller } from 'kibana/server';
+import { parseInterval } from '../../../common/util/parse_interval';
+
+/**
+ * Service for carrying out queries to obtain data
+ * specific to fields in Elasticsearch indices.
+ */
+export function fieldsServiceProvider(callAsCurrentUser: APICaller) {
+  /**
+   * Gets aggregatable fields.
+   */
+  async function getAggregatableFields(
+    index: string | string[],
+    fieldNames: string[]
+  ): Promise<string[]> {
+    const fieldCapsResp = await callAsCurrentUser('fieldCaps', {
+      index,
+      fields: fieldNames,
+    });
+    const aggregatableFields: string[] = [];
+    fieldNames.forEach(fieldName => {
+      const fieldInfo = fieldCapsResp.fields[fieldName];
+      const typeKeys = fieldInfo !== undefined ? Object.keys(fieldInfo) : [];
+      if (typeKeys.length > 0) {
+        const fieldType = typeKeys[0];
+        const isFieldAggregatable = fieldInfo[fieldType].aggregatable;
+        if (isFieldAggregatable === true) {
+          aggregatableFields.push(fieldName);
+        }
+      }
+    });
+    return aggregatableFields;
+  }
+
+  // Obtains the cardinality of one or more fields.
+  // Returns an Object whose keys are the names of the fields,
+  // with values equal to the cardinality of the field.
+  // Any of the supplied fieldNames which are not aggregatable will
+  // be omitted from the returned Object.
+  async function getCardinalityOfFields(
+    index: string[] | string,
+    fieldNames: string[],
+    query: any,
+    timeFieldName: string,
+    earliestMs: number,
+    latestMs: number
+  ): Promise<{ [key: string]: number }> {
+    const aggregatableFields = await getAggregatableFields(index, fieldNames);
+
+    if (aggregatableFields.length === 0) {
+      return {};
+    }
+
+    // Build the criteria to use in the bool filter part of the request.
+    // Add criteria for the time range and the datafeed config query.
+    const mustCriteria = [
+      {
+        range: {
+          [timeFieldName]: {
+            gte: earliestMs,
+            lte: latestMs,
+            format: 'epoch_millis',
+          },
+        },
+      },
+    ];
+
+    if (query) {
+      mustCriteria.push(query);
+    }
+
+    const aggs = aggregatableFields.reduce((obj, field) => {
+      obj[field] = { cardinality: { field } };
+      return obj;
+    }, {} as { [field: string]: { cardinality: { field: string } } });
+
+    const body = {
+      query: {
+        bool: {
+          must: mustCriteria,
+        },
+      },
+      size: 0,
+      _source: {
+        excludes: [],
+      },
+      aggs,
+    };
+
+    const aggregations = (
+      await callAsCurrentUser('search', {
+        index,
+        body,
+      })
+    )?.aggregations;
+
+    if (!aggregations) {
+      return {};
+    }
+
+    return aggregatableFields.reduce((obj, field) => {
+      obj[field] = (aggregations[field] || { value: 0 }).value;
+      return obj;
+    }, {} as { [field: string]: number });
+  }
+
+  function getTimeFieldRange(
+    index: string[] | string,
+    timeFieldName: string,
+    query: any
+  ): Promise<any> {
+    return new Promise((resolve, reject) => {
+      const obj = { success: true, start: { epoch: 0, string: '' }, end: { epoch: 0, string: '' } };
+
+      callAsCurrentUser('search', {
+        index,
+        size: 0,
+        body: {
+          query,
+          aggs: {
+            earliest: {
+              min: {
+                field: timeFieldName,
+              },
+            },
+            latest: {
+              max: {
+                field: timeFieldName,
+              },
+            },
+          },
+        },
+      })
+        .then(resp => {
+          if (resp.aggregations && resp.aggregations.earliest && resp.aggregations.latest) {
+            obj.start.epoch = resp.aggregations.earliest.value;
+            obj.start.string = resp.aggregations.earliest.value_as_string;
+
+            obj.end.epoch = resp.aggregations.latest.value;
+            obj.end.string = resp.aggregations.latest.value_as_string;
+          }
+          resolve(obj);
+        })
+        .catch(resp => {
+          reject(resp);
+        });
+    });
+  }
+
+  /**
+   * Caps provided time boundaries based on the interval.
+   * @param earliestMs
+   * @param latestMs
+   * @param interval
+   */
+  function getSafeTimeRange(
+    earliestMs: number,
+    latestMs: number,
+    interval: string
+  ): { start: number; end: number } {
+    const maxNumberOfBuckets = 1000;
+    const end = latestMs;
+
+    const intervalDuration = parseInterval(interval);
+
+    if (intervalDuration === null) {
+      throw Boom.badRequest('Interval is invalid');
+    }
+
+    const start = Math.max(
+      earliestMs,
+      latestMs - maxNumberOfBuckets * intervalDuration.asMilliseconds()
+    );
+
+    return { start, end };
+  }
+
+  /**
+   * Retrieves max cardinalities for provided fields from date interval buckets
+   * using max bucket pipeline aggregation.
+   *
+   * @param index
+   * @param fieldNames - fields to perform cardinality aggregation on
+   * @param query
+   * @param timeFieldName
+   * @param earliestMs
+   * @param latestMs
+   * @param interval - a fixed interval for the date histogram aggregation
+   */
+  async function getMaxBucketCardinalities(
+    index: string[] | string,
+    fieldNames: string[],
+    query: any,
+    timeFieldName: string,
+    earliestMs: number,
+    latestMs: number,
+    interval: string | undefined
+  ): Promise<{ [key: string]: number }> {
+    if (!interval) {
+      throw new Error('Interval is required to retrieve max bucket cardinalities.');
+    }
+
+    const aggregatableFields = await getAggregatableFields(index, fieldNames);
+
+    if (aggregatableFields.length === 0) {
+      return {};
+    }
+
+    const { start, end } = getSafeTimeRange(earliestMs, latestMs, interval);
+
+    const mustCriteria = [
+      {
+        range: {
+          [timeFieldName]: {
+            gte: start,
+            lte: end,
+            format: 'epoch_millis',
+          },
+        },
+      },
+    ];
+
+    if (query) {
+      mustCriteria.push(query);
+    }
+
+    const dateHistogramAggKey = 'bucket_span_buckets';
+    /**
+     * Replace any non-word characters
+     */
+    const getSafeAggName = (field: string) => field.replace(/\W/g, '');
+    const getMaxBucketAggKey = (field: string) => `max_bucket_${field}`;
+
+    const fieldsCardinalityAggs = aggregatableFields.reduce((obj, field) => {
+      obj[getSafeAggName(field)] = { cardinality: { field } };
+      return obj;
+    }, {} as { [field: string]: { cardinality: { field: string } } });
+
+    const maxBucketCardinalitiesAggs = Object.keys(fieldsCardinalityAggs).reduce((acc, field) => {
+      acc[getMaxBucketAggKey(field)] = {
+        max_bucket: {
+          buckets_path: `${dateHistogramAggKey}>${field}`,
+        },
+      };
+      return acc;
+    }, {} as { [key: string]: { max_bucket: { buckets_path: string } } });
+
+    const body = {
+      query: {
+        bool: {
+          filter: mustCriteria,
+        },
+      },
+      size: 0,
+      aggs: {
+        [dateHistogramAggKey]: {
+          date_histogram: {
+            field: timeFieldName,
+            fixed_interval: interval,
+          },
+          aggs: fieldsCardinalityAggs,
+        },
+        ...maxBucketCardinalitiesAggs,
+      },
+    };
+
+    const aggregations = (
+      await callAsCurrentUser('search', {
+        index,
+        body,
+      })
+    )?.aggregations;
+
+    if (!aggregations) {
+      return {};
+    }
+
+    return aggregatableFields.reduce((obj, field) => {
+      obj[field] = (aggregations[getMaxBucketAggKey(field)] || { value: 0 }).value ?? 0;
+      return obj;
+    }, {} as { [field: string]: number });
+  }
+
+  return {
+    getCardinalityOfFields,
+    getTimeFieldRange,
+    getMaxBucketCardinalities,
+  };
+}
--- a/x-pack/plugins/ml/server/models/job_validation/validate_job_object.ts
+++ b/x-pack/plugins/ml/server/models/job_validation/validate_job_object.ts
@ -5,8 +5,9 @@
 */

 import { i18n } from '@kbn/i18n';
+import { CombinedJob } from '../../../common/types/anomaly_detection_jobs';

-export function validateJobObject(job) {
+export function validateJobObject(job: CombinedJob | null) {
  if (job === null || typeof job !== 'object') {
    throw new Error(
      i18n.translate('xpack.ml.models.jobValidation.validateJobObject.jobIsNotObjectErrorMessage', {
--- a/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.js
+++ b/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.js
@ -1,170 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License;
- * you may not use this file except in compliance with the Elastic License.
- */
-
-import numeral from '@elastic/numeral';
-import { validateJobObject } from './validate_job_object';
-import { calculateModelMemoryLimitProvider } from '../../models/calculate_model_memory_limit';
-import { ALLOWED_DATA_UNITS } from '../../../common/constants/validation';
-
-// The minimum value the backend expects is 1MByte
-const MODEL_MEMORY_LIMIT_MINIMUM_BYTES = 1048576;
-
-export async function validateModelMemoryLimit(callWithRequest, job, duration) {
-  validateJobObject(job);
-
-  // retrieve the max_model_memory_limit value from the server
-  // this will be unset unless the user has set this on their cluster
-  const mlInfo = await callWithRequest('ml.info');
-  const maxModelMemoryLimit =
-    typeof mlInfo.limits === 'undefined' ? undefined : mlInfo.limits.max_model_memory_limit;
-
-  // retrieve the model memory limit specified by the user in the job config.
-  // note, this will probably be the auto generated value, unless the user has
-  // over written it.
-  const mml =
-    typeof job.analysis_limits !== 'undefined' &&
-    typeof job.analysis_limits.model_memory_limit !== 'undefined'
-      ? job.analysis_limits.model_memory_limit.toUpperCase()
-      : null;
-
-  const splitFieldNames = {};
-  let splitFieldName = '';
-  const fieldNames = [];
-  let runCalcModelMemoryTest = true;
-  let validModelMemoryLimit = true;
-
-  // extract the field names and partition field names from the detectors
-  // we only want to estimate the mml for multi-metric jobs.
-  // a multi-metric job will have one partition field, one or more field names
-  // and no over or by fields
-  job.analysis_config.detectors.forEach(d => {
-    if (typeof d.field_name !== 'undefined') {
-      fieldNames.push(d.field_name);
-    }
-
-    // create a deduplicated list of partition field names.
-    if (typeof d.partition_field_name !== 'undefined') {
-      splitFieldNames[d.partition_field_name] = null;
-    }
-
-    // if an over or by field is present, do not run the estimate test
-    if (typeof d.over_field_name !== 'undefined' || typeof d.by_field_name !== 'undefined') {
-      runCalcModelMemoryTest = false;
-    }
-  });
-
-  // if there are no or more than one partition fields, do not run the test
-  if (Object.keys(splitFieldNames).length === 1) {
-    splitFieldName = Object.keys(splitFieldNames)[0];
-  } else {
-    runCalcModelMemoryTest = false;
-  }
-
-  // if there is no duration, do not run the estimate test
-  if (
-    typeof duration === 'undefined' ||
-    typeof duration.start === 'undefined' ||
-    typeof duration.end === 'undefined'
-  ) {
-    runCalcModelMemoryTest = false;
-  }
-
-  const messages = [];
-
-  // check that mml is a valid data format
-  if (mml !== null) {
-    const mmlSplit = mml.match(/\d+(\w+)/);
-    const unit = mmlSplit && mmlSplit.length === 2 ? mmlSplit[1] : null;
-
-    if (ALLOWED_DATA_UNITS.indexOf(unit) === -1) {
-      messages.push({
-        id: 'mml_value_invalid',
-        mml,
-      });
-      // mml is not a valid data format.
-      // abort all other tests
-      validModelMemoryLimit = false;
-    }
-  }
-
-  if (validModelMemoryLimit) {
-    if (runCalcModelMemoryTest) {
-      const mmlEstimate = await calculateModelMemoryLimitProvider(callWithRequest)(
-        job.datafeed_config.indices.join(','),
-        splitFieldName,
-        job.datafeed_config.query,
-        fieldNames,
-        job.analysis_config.influencers,
-        job.data_description.time_field,
-        duration.start,
-        duration.end,
-        true
-      );
-      const mmlEstimateBytes = numeral(mmlEstimate.modelMemoryLimit).value();
-
-      let runEstimateGreaterThenMml = true;
-      // if max_model_memory_limit has been set,
-      // make sure the estimated value is not greater than it.
-      if (typeof maxModelMemoryLimit !== 'undefined') {
-        const maxMmlBytes = numeral(maxModelMemoryLimit.toUpperCase()).value();
-        if (mmlEstimateBytes > maxMmlBytes) {
-          runEstimateGreaterThenMml = false;
-          messages.push({
-            id: 'estimated_mml_greater_than_max_mml',
-            maxModelMemoryLimit,
-            mmlEstimate,
-          });
-        }
-      }
-
-      // check to see if the estimated mml is greater that the user
-      // specified mml
-      // do not run this if we've already found that it's larger than
-      // the max mml
-      if (runEstimateGreaterThenMml && mml !== null) {
-        const mmlBytes = numeral(mml).value();
-        if (mmlBytes < MODEL_MEMORY_LIMIT_MINIMUM_BYTES) {
-          messages.push({
-            id: 'mml_value_invalid',
-            mml,
-          });
-        } else if (mmlEstimateBytes / 2 > mmlBytes) {
-          messages.push({
-            id: 'half_estimated_mml_greater_than_mml',
-            maxModelMemoryLimit,
-            mml,
-          });
-        } else if (mmlEstimateBytes > mmlBytes) {
-          messages.push({
-            id: 'estimated_mml_greater_than_mml',
-            maxModelMemoryLimit,
-            mml,
-          });
-        }
-      }
-    }
-
-    // if max_model_memory_limit has been set,
-    // make sure the user defined MML is not greater than it
-    if (maxModelMemoryLimit !== undefined && mml !== null) {
-      const maxMmlBytes = numeral(maxModelMemoryLimit.toUpperCase()).value();
-      const mmlBytes = numeral(mml).value();
-      if (mmlBytes > maxMmlBytes) {
-        messages.push({
-          id: 'mml_greater_than_max_mml',
-          maxModelMemoryLimit,
-          mml,
-        });
-      }
-    }
-  }
-
-  if (messages.length === 0 && runCalcModelMemoryTest === true) {
-    messages.push({ id: 'success_mml' });
-  }
-
-  return Promise.resolve(messages);
-}
--- a/x-pack/plugins/ml/server/models/job_validation/tests/validate_model_memory_limit.js
+++ b/x-pack/plugins/ml/server/models/job_validation/tests/validate_model_memory_limit.js
@ -4,8 +4,10 @@
 * you may not use this file except in compliance with the Elastic License.
 */

-import expect from '@kbn/expect';
-import { validateModelMemoryLimit } from '../validate_model_memory_limit';
+import { APICaller } from 'kibana/server';
+import { CombinedJob, Detector } from '../../../common/types/anomaly_detection_jobs';
+import { ModelMemoryEstimate } from '../calculate_model_memory_limit/calculate_model_memory_limit';
+import { validateModelMemoryLimit } from './validate_model_memory_limit';

 describe('ML - validateModelMemoryLimit', () => {
  // mock info endpoint response
@ -61,29 +63,43 @@ describe('ML - validateModelMemoryLimit', () => {
    },
  };

+  // mock estimate model memory
+  const modelMemoryEstimateResponse: ModelMemoryEstimate = {
+    model_memory_estimate: '40mb',
+  };
+
+  interface MockAPICallResponse {
+    'ml.estimateModelMemory'?: ModelMemoryEstimate;
+  }
+
  // mock callWithRequest
  // used in three places:
  // - to retrieve the info endpoint
  // - to search for cardinality of split field
  // - to retrieve field capabilities used in search for split field cardinality
-  function callWithRequest(call) {
-    if (typeof call === undefined) {
-      return Promise.reject();
-    }
+  const getMockCallWithRequest = ({
+    'ml.estimateModelMemory': estimateModelMemory,
+  }: MockAPICallResponse = {}) =>
+    ((call: string) => {
+      if (typeof call === undefined) {
+        return Promise.reject();
+      }

-    let response = {};
-    if (call === 'ml.info') {
-      response = mlInfoResponse;
-    } else if (call === 'search') {
-      response = cardinalitySearchResponse;
-    } else if (call === 'fieldCaps') {
-      response = fieldCapsResponse;
-    }
-    return Promise.resolve(response);
-  }
+      let response = {};
+      if (call === 'ml.info') {
+        response = mlInfoResponse;
+      } else if (call === 'search') {
+        response = cardinalitySearchResponse;
+      } else if (call === 'fieldCaps') {
+        response = fieldCapsResponse;
+      } else if (call === 'ml.estimateModelMemory') {
+        response = estimateModelMemory || modelMemoryEstimateResponse;
+      }
+      return Promise.resolve(response);
+    }) as APICaller;

-  function getJobConfig(influencers = [], detectors = []) {
-    return {
+  function getJobConfig(influencers: string[] = [], detectors: Detector[] = []) {
+    return ({
      analysis_config: { detectors, influencers },
      data_description: { time_field: '@timestamp' },
      datafeed_config: {
@ -92,11 +108,11 @@ describe('ML - validateModelMemoryLimit', () => {
      analysis_limits: {
        model_memory_limit: '20mb',
      },
-    };
+    } as unknown) as CombinedJob;
  }

  // create a specified number of mock detectors
-  function createDetectors(numberOfDetectors) {
+  function createDetectors(numberOfDetectors: number): Detector[] {
    const dtrs = [];
    for (let i = 0; i < numberOfDetectors; i++) {
      dtrs.push({
@ -105,28 +121,28 @@ describe('ML - validateModelMemoryLimit', () => {
        partition_field_name: 'instance',
      });
    }
-    return dtrs;
+    return dtrs as Detector[];
  }

-  // tests
  it('Called with no duration or split and mml within limit', () => {
    const job = getJobConfig();
    const duration = undefined;

-    return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
+    return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
      const ids = messages.map(m => m.id);
-      expect(ids).to.eql([]);
+      expect(ids).toEqual([]);
    });
  });

  it('Called with no duration or split and mml above limit', () => {
    const job = getJobConfig();
    const duration = undefined;
+    // @ts-ignore
    job.analysis_limits.model_memory_limit = '31mb';

-    return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
+    return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
      const ids = messages.map(m => m.id);
-      expect(ids).to.eql(['mml_greater_than_max_mml']);
+      expect(ids).toEqual(['mml_greater_than_max_mml']);
    });
  });

@ -134,11 +150,16 @@ describe('ML - validateModelMemoryLimit', () => {
    const dtrs = createDetectors(10);
    const job = getJobConfig(['instance'], dtrs);
    const duration = { start: 0, end: 1 };
+    // @ts-ignore
    job.analysis_limits.model_memory_limit = '20mb';

-    return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
+    return validateModelMemoryLimit(
+      getMockCallWithRequest({ 'ml.estimateModelMemory': { model_memory_estimate: '66mb' } }),
+      job,
+      duration
+    ).then(messages => {
      const ids = messages.map(m => m.id);
-      expect(ids).to.eql(['estimated_mml_greater_than_max_mml']);
+      expect(ids).toEqual(['estimated_mml_greater_than_max_mml']);
    });
  });

@ -146,11 +167,16 @@ describe('ML - validateModelMemoryLimit', () => {
    const dtrs = createDetectors(2);
    const job = getJobConfig(['instance'], dtrs);
    const duration = { start: 0, end: 1 };
+    // @ts-ignore
    job.analysis_limits.model_memory_limit = '30mb';

-    return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
+    return validateModelMemoryLimit(
+      getMockCallWithRequest({ 'ml.estimateModelMemory': { model_memory_estimate: '24mb' } }),
+      job,
+      duration
+    ).then(messages => {
      const ids = messages.map(m => m.id);
-      expect(ids).to.eql(['success_mml']);
+      expect(ids).toEqual(['success_mml']);
    });
  });

@ -158,11 +184,16 @@ describe('ML - validateModelMemoryLimit', () => {
    const dtrs = createDetectors(2);
    const job = getJobConfig(['instance'], dtrs);
    const duration = { start: 0, end: 1 };
+    // @ts-ignore
    job.analysis_limits.model_memory_limit = '10mb';

-    return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
+    return validateModelMemoryLimit(
+      getMockCallWithRequest({ 'ml.estimateModelMemory': { model_memory_estimate: '22mb' } }),
+      job,
+      duration
+    ).then(messages => {
      const ids = messages.map(m => m.id);
-      expect(ids).to.eql(['half_estimated_mml_greater_than_mml']);
+      expect(ids).toEqual(['half_estimated_mml_greater_than_mml']);
    });
  });

@ -171,11 +202,12 @@ describe('ML - validateModelMemoryLimit', () => {
    const job = getJobConfig(['instance'], dtrs);
    const duration = { start: 0, end: 1 };
    delete mlInfoResponse.limits.max_model_memory_limit;
+    // @ts-ignore
    job.analysis_limits.model_memory_limit = '10mb';

-    return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
+    return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
      const ids = messages.map(m => m.id);
-      expect(ids).to.eql(['half_estimated_mml_greater_than_mml']);
+      expect(ids).toEqual(['half_estimated_mml_greater_than_mml']);
    });
  });

@ -183,11 +215,16 @@ describe('ML - validateModelMemoryLimit', () => {
    const dtrs = createDetectors(1);
    const job = getJobConfig(['instance'], dtrs);
    const duration = { start: 0, end: 1 };
+    // @ts-ignore
    job.analysis_limits.model_memory_limit = '20mb';

-    return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
+    return validateModelMemoryLimit(
+      getMockCallWithRequest({ 'ml.estimateModelMemory': { model_memory_estimate: '19mb' } }),
+      job,
+      duration
+    ).then(messages => {
      const ids = messages.map(m => m.id);
-      expect(ids).to.eql(['success_mml']);
+      expect(ids).toEqual(['success_mml']);
    });
  });

@ -195,11 +232,12 @@ describe('ML - validateModelMemoryLimit', () => {
    const dtrs = createDetectors(1);
    const job = getJobConfig(['instance'], dtrs);
    const duration = { start: 0, end: 1 };
+    // @ts-ignore
    job.analysis_limits.model_memory_limit = '0mb';

-    return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
+    return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
      const ids = messages.map(m => m.id);
-      expect(ids).to.eql(['mml_value_invalid']);
+      expect(ids).toEqual(['mml_value_invalid']);
    });
  });

@ -207,11 +245,12 @@ describe('ML - validateModelMemoryLimit', () => {
    const dtrs = createDetectors(1);
    const job = getJobConfig(['instance'], dtrs);
    const duration = { start: 0, end: 1 };
+    // @ts-ignore
    job.analysis_limits.model_memory_limit = '10mbananas';

-    return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
+    return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
      const ids = messages.map(m => m.id);
-      expect(ids).to.eql(['mml_value_invalid']);
+      expect(ids).toEqual(['mml_value_invalid']);
    });
  });

@ -219,11 +258,12 @@ describe('ML - validateModelMemoryLimit', () => {
    const dtrs = createDetectors(1);
    const job = getJobConfig(['instance'], dtrs);
    const duration = { start: 0, end: 1 };
+    // @ts-ignore
    job.analysis_limits.model_memory_limit = '10';

-    return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
+    return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
      const ids = messages.map(m => m.id);
-      expect(ids).to.eql(['mml_value_invalid']);
+      expect(ids).toEqual(['mml_value_invalid']);
    });
  });

@ -231,11 +271,12 @@ describe('ML - validateModelMemoryLimit', () => {
    const dtrs = createDetectors(1);
    const job = getJobConfig(['instance'], dtrs);
    const duration = { start: 0, end: 1 };
+    // @ts-ignore
    job.analysis_limits.model_memory_limit = 'mb';

-    return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
+    return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
      const ids = messages.map(m => m.id);
-      expect(ids).to.eql(['mml_value_invalid']);
+      expect(ids).toEqual(['mml_value_invalid']);
    });
  });

@ -243,11 +284,12 @@ describe('ML - validateModelMemoryLimit', () => {
    const dtrs = createDetectors(1);
    const job = getJobConfig(['instance'], dtrs);
    const duration = { start: 0, end: 1 };
+    // @ts-ignore
    job.analysis_limits.model_memory_limit = 'asdf';

-    return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
+    return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
      const ids = messages.map(m => m.id);
-      expect(ids).to.eql(['mml_value_invalid']);
+      expect(ids).toEqual(['mml_value_invalid']);
    });
  });

@ -255,11 +297,12 @@ describe('ML - validateModelMemoryLimit', () => {
    const dtrs = createDetectors(1);
    const job = getJobConfig(['instance'], dtrs);
    const duration = { start: 0, end: 1 };
+    // @ts-ignore
    job.analysis_limits.model_memory_limit = '1023KB';

-    return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
+    return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
      const ids = messages.map(m => m.id);
-      expect(ids).to.eql(['mml_value_invalid']);
+      expect(ids).toEqual(['mml_value_invalid']);
    });
  });

@ -267,11 +310,12 @@ describe('ML - validateModelMemoryLimit', () => {
    const dtrs = createDetectors(1);
    const job = getJobConfig(['instance'], dtrs);
    const duration = { start: 0, end: 1 };
+    // @ts-ignore
    job.analysis_limits.model_memory_limit = '1024KB';

-    return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
+    return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
      const ids = messages.map(m => m.id);
-      expect(ids).to.eql(['half_estimated_mml_greater_than_mml']);
+      expect(ids).toEqual(['half_estimated_mml_greater_than_mml']);
    });
  });

@ -279,11 +323,12 @@ describe('ML - validateModelMemoryLimit', () => {
    const dtrs = createDetectors(1);
    const job = getJobConfig(['instance'], dtrs);
    const duration = { start: 0, end: 1 };
+    // @ts-ignore
    job.analysis_limits.model_memory_limit = '6MB';

-    return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
+    return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
      const ids = messages.map(m => m.id);
-      expect(ids).to.eql(['half_estimated_mml_greater_than_mml']);
+      expect(ids).toEqual(['half_estimated_mml_greater_than_mml']);
    });
  });

@ -291,11 +336,16 @@ describe('ML - validateModelMemoryLimit', () => {
    const dtrs = createDetectors(1);
    const job = getJobConfig(['instance'], dtrs);
    const duration = { start: 0, end: 1 };
+    // @ts-ignore
    job.analysis_limits.model_memory_limit = '20MB';

-    return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
+    return validateModelMemoryLimit(
+      getMockCallWithRequest({ 'ml.estimateModelMemory': { model_memory_estimate: '20mb' } }),
+      job,
+      duration
+    ).then(messages => {
      const ids = messages.map(m => m.id);
-      expect(ids).to.eql(['success_mml']);
+      expect(ids).toEqual(['success_mml']);
    });
  });
 });
--- a/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.ts
+++ b/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.ts
@ -0,0 +1,135 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+import numeral from '@elastic/numeral';
+import { APICaller } from 'kibana/server';
+import { CombinedJob } from '../../../common/types/anomaly_detection_jobs';
+import { validateJobObject } from './validate_job_object';
+import { calculateModelMemoryLimitProvider } from '../calculate_model_memory_limit';
+import { ALLOWED_DATA_UNITS } from '../../../common/constants/validation';
+
+// The minimum value the backend expects is 1MByte
+const MODEL_MEMORY_LIMIT_MINIMUM_BYTES = 1048576;
+
+export async function validateModelMemoryLimit(
+  callWithRequest: APICaller,
+  job: CombinedJob,
+  duration?: { start?: number; end?: number }
+) {
+  validateJobObject(job);
+
+  // retrieve the model memory limit specified by the user in the job config.
+  // note, this will probably be the auto generated value, unless the user has
+  // over written it.
+  const mml = job?.analysis_limits?.model_memory_limit?.toUpperCase() ?? null;
+
+  const messages = [];
+
+  // check that mml is a valid data format
+  if (mml !== null) {
+    const mmlSplit = mml.match(/\d+(\w+)/);
+    const unit = mmlSplit && mmlSplit.length === 2 ? mmlSplit[1] : null;
+
+    if (unit === null || !ALLOWED_DATA_UNITS.includes(unit)) {
+      messages.push({
+        id: 'mml_value_invalid',
+        mml,
+      });
+      // mml is not a valid data format.
+      // abort all other tests
+      return messages;
+    }
+  }
+
+  // if there is no duration, do not run the estimate test
+  const runCalcModelMemoryTest =
+    duration && typeof duration?.start !== undefined && duration?.end !== undefined;
+
+  // retrieve the max_model_memory_limit value from the server
+  // this will be unset unless the user has set this on their cluster
+  const maxModelMemoryLimit: string | undefined = (
+    await callWithRequest('ml.info')
+  )?.limits?.max_model_memory_limit?.toUpperCase();
+
+  if (runCalcModelMemoryTest) {
+    const { modelMemoryLimit } = await calculateModelMemoryLimitProvider(callWithRequest)(
+      job.analysis_config,
+      job.datafeed_config.indices.join(','),
+      job.datafeed_config.query,
+      job.data_description.time_field,
+      duration!.start as number,
+      duration!.end as number,
+      true
+    );
+    // @ts-ignore
+    const mmlEstimateBytes: number = numeral(modelMemoryLimit).value();
+
+    let runEstimateGreaterThenMml = true;
+    // if max_model_memory_limit has been set,
+    // make sure the estimated value is not greater than it.
+    if (typeof maxModelMemoryLimit !== 'undefined') {
+      // @ts-ignore
+      const maxMmlBytes: number = numeral(maxModelMemoryLimit).value();
+      if (mmlEstimateBytes > maxMmlBytes) {
+        runEstimateGreaterThenMml = false;
+        messages.push({
+          id: 'estimated_mml_greater_than_max_mml',
+          maxModelMemoryLimit,
+          modelMemoryLimit,
+        });
+      }
+    }
+
+    // check to see if the estimated mml is greater that the user
+    // specified mml
+    // do not run this if we've already found that it's larger than
+    // the max mml
+    if (runEstimateGreaterThenMml && mml !== null) {
+      // @ts-ignore
+      const mmlBytes: number = numeral(mml).value();
+      if (mmlBytes < MODEL_MEMORY_LIMIT_MINIMUM_BYTES) {
+        messages.push({
+          id: 'mml_value_invalid',
+          mml,
+        });
+      } else if (mmlEstimateBytes / 2 > mmlBytes) {
+        messages.push({
+          id: 'half_estimated_mml_greater_than_mml',
+          maxModelMemoryLimit,
+          mml,
+        });
+      } else if (mmlEstimateBytes > mmlBytes) {
+        messages.push({
+          id: 'estimated_mml_greater_than_mml',
+          maxModelMemoryLimit,
+          mml,
+        });
+      }
+    }
+  }
+
+  // if max_model_memory_limit has been set,
+  // make sure the user defined MML is not greater than it
+  if (maxModelMemoryLimit !== undefined && mml !== null) {
+    // @ts-ignore
+    const maxMmlBytes = numeral(maxModelMemoryLimit).value();
+    // @ts-ignore
+    const mmlBytes = numeral(mml).value();
+    if (mmlBytes > maxMmlBytes) {
+      messages.push({
+        id: 'mml_greater_than_max_mml',
+        maxModelMemoryLimit,
+        mml,
+      });
+    }
+  }
+
+  if (messages.length === 0 && runCalcModelMemoryTest === true) {
+    messages.push({ id: 'success_mml' });
+  }
+
+  return messages;
+}
--- a/x-pack/plugins/ml/server/routes/anomaly_detectors.ts
+++ b/x-pack/plugins/ml/server/routes/anomaly_detectors.ts
@ -148,7 +148,7 @@ export function jobRoutes({ router, mlLicense }: RouteInitialization) {
        params: schema.object({
          jobId: schema.string(),
        }),
-        body: schema.object({ ...anomalyDetectionJobSchema }),
+        body: schema.object(anomalyDetectionJobSchema),
      },
    },
    mlLicense.fullLicenseAPIGuard(async (context, request, response) => {
--- a/x-pack/plugins/ml/server/routes/job_validation.ts
+++ b/x-pack/plugins/ml/server/routes/job_validation.ts
@ -7,6 +7,7 @@
 import Boom from 'boom';
 import { RequestHandlerContext } from 'kibana/server';
 import { schema, TypeOf } from '@kbn/config-schema';
+import { AnalysisConfig } from '../../common/types/anomaly_detection_jobs';
 import { wrapError } from '../client/error_wrapper';
 import { RouteInitialization } from '../types';
 import {
@ -29,23 +30,12 @@ export function jobValidationRoutes({ router, mlLicense }: RouteInitialization,
    context: RequestHandlerContext,
    payload: CalculateModelMemoryLimitPayload
  ) {
-    const {
-      indexPattern,
-      splitFieldName,
-      query,
-      fieldNames,
-      influencerNames,
-      timeFieldName,
-      earliestMs,
-      latestMs,
-    } = payload;
+    const { analysisConfig, indexPattern, query, timeFieldName, earliestMs, latestMs } = payload;

    return calculateModelMemoryLimitProvider(context.ml!.mlClient.callAsCurrentUser)(
+      analysisConfig as AnalysisConfig,
      indexPattern,
-      splitFieldName,
      query,
-      fieldNames,
-      influencerNames,
      timeFieldName,
      earliestMs,
      latestMs
@ -102,7 +92,7 @@ export function jobValidationRoutes({ router, mlLicense }: RouteInitialization,
   *
   * @api {post} /api/ml/validate/calculate_model_memory_limit Calculates model memory limit
   * @apiName CalculateModelMemoryLimit
-   * @apiDescription Calculates the model memory limit
+   * @apiDescription Calls _estimate_model_memory endpoint to retrieve model memory estimation.
   *
   * @apiSuccess {String} modelMemoryLimit
   */
--- a/x-pack/plugins/ml/server/routes/schemas/anomaly_detectors_schema.ts
+++ b/x-pack/plugins/ml/server/routes/schemas/anomaly_detectors_schema.ts
@ -63,14 +63,16 @@ export const anomalyDetectionUpdateJobSchema = {
  groups: schema.maybe(schema.arrayOf(schema.maybe(schema.string()))),
 };

+export const analysisConfigSchema = schema.object({
+  bucket_span: schema.maybe(schema.string()),
+  summary_count_field_name: schema.maybe(schema.string()),
+  detectors: schema.arrayOf(detectorSchema),
+  influencers: schema.arrayOf(schema.maybe(schema.string())),
+  categorization_field_name: schema.maybe(schema.string()),
+});
+
 export const anomalyDetectionJobSchema = {
-  analysis_config: schema.object({
-    bucket_span: schema.maybe(schema.string()),
-    summary_count_field_name: schema.maybe(schema.string()),
-    detectors: schema.arrayOf(detectorSchema),
-    influencers: schema.arrayOf(schema.maybe(schema.string())),
-    categorization_field_name: schema.maybe(schema.string()),
-  }),
+  analysis_config: analysisConfigSchema,
  analysis_limits: schema.maybe(
    schema.object({
      categorization_examples_limit: schema.maybe(schema.number()),
--- a/x-pack/plugins/ml/server/routes/schemas/job_validation_schema.ts
+++ b/x-pack/plugins/ml/server/routes/schemas/job_validation_schema.ts
@ -5,7 +5,7 @@
 */

 import { schema } from '@kbn/config-schema';
-import { anomalyDetectionJobSchema } from './anomaly_detectors_schema';
+import { analysisConfigSchema, anomalyDetectionJobSchema } from './anomaly_detectors_schema';
 import { datafeedConfigSchema } from './datafeeds_schema';

 export const estimateBucketSpanSchema = schema.object({
@ -20,11 +20,9 @@ export const estimateBucketSpanSchema = schema.object({
 });

 export const modelMemoryLimitSchema = schema.object({
+  analysisConfig: analysisConfigSchema,
  indexPattern: schema.string(),
-  splitFieldName: schema.string(),
  query: schema.any(),
-  fieldNames: schema.arrayOf(schema.string()),
-  influencerNames: schema.arrayOf(schema.maybe(schema.string())),
  timeFieldName: schema.string(),
  earliestMs: schema.number(),
  latestMs: schema.number(),
--- a/x-pack/test/api_integration/apis/ml/calculate_model_memory_limit.ts
+++ b/x-pack/test/api_integration/apis/ml/calculate_model_memory_limit.ts
@ -21,14 +21,20 @@ export default ({ getService }: FtrProviderContext) => {

  const testDataList = [
    {
-      testTitleSuffix: 'with 0 metrics, 0 influencers and no split field',
+      testTitleSuffix: 'when no partition field is provided with regular function',
      user: USER.ML_POWERUSER,
      requestBody: {
        indexPattern: 'ecommerce',
-        splitFieldName: '',
+        analysisConfig: {
+          bucket_span: '15m',
+          detectors: [
+            {
+              function: 'mean',
+            },
+          ],
+          influencers: [],
+        },
        query: { bool: { must: [{ match_all: {} }], filter: [], must_not: [] } },
-        fieldNames: ['__ml_event_rate_count__'],
-        influencerNames: [],
        timeFieldName: 'order_date',
        earliestMs: 1560297859000,
        latestMs: 1562975136000,
@ -38,7 +44,8 @@ export default ({ getService }: FtrProviderContext) => {
        responseBody: {
          statusCode: 400,
          error: 'Bad Request',
-          message: "[illegal_argument_exception] specified fields can't be null or empty",
+          message:
+            '[status_exception] Unless a count or temporal function is used one of field_name, by_field_name or over_field_name must be set',
        },
      },
    },
@ -47,72 +54,79 @@ export default ({ getService }: FtrProviderContext) => {
      user: USER.ML_POWERUSER,
      requestBody: {
        indexPattern: 'ecommerce',
-        splitFieldName: 'geoip.city_name',
+        analysisConfig: {
+          bucket_span: '15m',
+          detectors: [
+            {
+              function: 'avg',
+              field_name: 'geoip.city_name',
+              by_field_name: 'geoip.city_name',
+            },
+          ],
+          influencers: ['geoip.city_name'],
+        },
        query: { bool: { must: [{ match_all: {} }], filter: [], must_not: [] } },
-        fieldNames: ['products.base_price'],
-        influencerNames: ['geoip.city_name'],
        timeFieldName: 'order_date',
        earliestMs: 1560297859000,
        latestMs: 1562975136000,
      },
      expected: {
        responseCode: 200,
-        responseBody: { modelMemoryLimit: '12MB' },
+        responseBody: { modelMemoryLimit: '11MB', estimatedModelMemoryLimit: '11MB' },
      },
    },
    {
-      testTitleSuffix: 'with 3 metrics, 3 influencers, split by city',
+      testTitleSuffix: 'with 3 influencers, split by city',
      user: USER.ML_POWERUSER,
      requestBody: {
        indexPattern: 'ecommerce',
-        splitFieldName: 'geoip.city_name',
+        analysisConfig: {
+          bucket_span: '15m',
+          detectors: [
+            {
+              function: 'mean',
+              by_field_name: 'geoip.city_name',
+              field_name: 'geoip.city_name',
+            },
+          ],
+          influencers: ['geoip.city_name', 'customer_gender', 'customer_full_name.keyword'],
+        },
        query: { bool: { must: [{ match_all: {} }], filter: [], must_not: [] } },
-        fieldNames: ['products.base_price', 'taxful_total_price', 'products.discount_amount'],
-        influencerNames: ['geoip.city_name', 'customer_gender', 'customer_full_name.keyword'],
        timeFieldName: 'order_date',
        earliestMs: 1560297859000,
        latestMs: 1562975136000,
      },
      expected: {
        responseCode: 200,
-        responseBody: { modelMemoryLimit: '14MB' },
+        responseBody: { estimatedModelMemoryLimit: '11MB', modelMemoryLimit: '11MB' },
      },
    },
    {
-      testTitleSuffix: 'with 4 metrics, 4 influencers, split by customer_id',
+      testTitleSuffix: '4 influencers, split by customer_id and filtering by country code',
      user: USER.ML_POWERUSER,
      requestBody: {
        indexPattern: 'ecommerce',
-        splitFieldName: 'customer_id',
-        query: { bool: { must: [{ match_all: {} }], filter: [], must_not: [] } },
-        fieldNames: [
-          'geoip.country_iso_code',
-          'taxless_total_price',
-          'taxful_total_price',
-          'products.discount_amount',
-        ],
-        influencerNames: [
-          'customer_id',
-          'geoip.country_iso_code',
-          'products.discount_percentage',
-          'products.discount_amount',
-        ],
-        timeFieldName: 'order_date',
-        earliestMs: 1560297859000,
-        latestMs: 1562975136000,
-      },
-      expected: {
-        responseCode: 200,
-        responseBody: { modelMemoryLimit: '23MB' },
-      },
-    },
-    {
-      testTitleSuffix:
-        'with 4 metrics, 4 influencers, split by customer_id and filtering by country code',
-      user: USER.ML_POWERUSER,
-      requestBody: {
-        indexPattern: 'ecommerce',
-        splitFieldName: 'customer_id',
+        analysisConfig: {
+          bucket_span: '2d',
+          detectors: [
+            {
+              function: 'mean',
+              by_field_name: 'customer_id.city_name',
+              field_name: 'customer_id.city_name',
+            },
+            {
+              function: 'avg',
+              by_field_name: 'manufacturer.keyword',
+              field_name: 'manufacturer.keyword',
+            },
+          ],
+          influencers: [
+            'geoip.country_iso_code',
+            'products.discount_percentage',
+            'products.discount_amount',
+            'day_of_week',
+          ],
+        },
        query: {
          bool: {
            filter: {
@ -122,25 +136,13 @@ export default ({ getService }: FtrProviderContext) => {
            },
          },
        },
-        fieldNames: [
-          'geoip.country_iso_code',
-          'taxless_total_price',
-          'taxful_total_price',
-          'products.discount_amount',
-        ],
-        influencerNames: [
-          'customer_id',
-          'geoip.country_iso_code',
-          'products.discount_percentage',
-          'products.discount_amount',
-        ],
        timeFieldName: 'order_date',
        earliestMs: 1560297859000,
        latestMs: 1562975136000,
      },
      expected: {
        responseCode: 200,
-        responseBody: { modelMemoryLimit: '14MB' },
+        responseBody: { estimatedModelMemoryLimit: '12MB', modelMemoryLimit: '12MB' },
      },
    },
  ];