[ML] Better model memory limit validation (#21270)

2025-04-23 17:28:26 -04:00 · 2018-07-27 07:49:17 +01:00 · 2018-07-27 07:49:17 +01:00 · 60706bf77a
commit 60706bf77a
parent b328bd0e3d
8 changed files with 189 additions and 66 deletions
--- a/x-pack/plugins/ml/common/constants/validation.js
+++ b/x-pack/plugins/ml/common/constants/validation.js
@ -15,3 +15,5 @@ export const VALIDATION_STATUS = {
 };

 export const SKIP_BUCKET_SPAN_ESTIMATION = true;
+
+export const ALLOWED_DATA_UNITS = ['B', 'KB', 'MB', 'GB', 'TB', 'PB'];
--- a/x-pack/plugins/ml/common/util/job_utils.js
+++ b/x-pack/plugins/ml/common/util/job_utils.js
@ -10,6 +10,7 @@ import _ from 'lodash';
 import semver from 'semver';
 import numeral from '@elastic/numeral';

+import { ALLOWED_DATA_UNITS } from '../constants/validation';
 import { parseInterval } from './parse_interval';

 // work out the default frequency based on the bucket_span in seconds
@ -235,7 +236,7 @@ export function uniqWithIsEqual(arr) {
 // check job without manipulating UI and return a list of messages
 // job and fields get passed as arguments and are not accessed as $scope.* via the outer scope
 // because the plan is to move this function to the common code area so that it can be used on the server side too.
-export function basicJobValidation(job, fields, limits) {
+export function basicJobValidation(job, fields, limits, skipMmlChecks = false) {
  const messages = [];
  let valid = true;

@ -368,14 +369,28 @@ export function basicJobValidation(job, fields, limits) {
      }
    }

-    // model memory limit
-    const {
-      messages: mmlMessages,
-      valid: mmlValid,
-    } = validateModelMemoryLimit(job, limits);
+    if (skipMmlChecks === false) {
+      // model memory limit
+      const {
+        messages: mmlUnitMessages,
+        valid: mmlUnitValid,
+      } = validateModelMemoryLimitUnits(job);

-    messages.push(...mmlMessages);
-    valid = (valid && mmlValid);
+      messages.push(...mmlUnitMessages);
+      valid = (valid && mmlUnitValid);
+
+      if (mmlUnitValid) {
+        // if mml is a valid format,
+        // run the validation against max mml
+        const {
+          messages: mmlMessages,
+          valid: mmlValid,
+        } = validateModelMemoryLimit(job, limits);
+
+        messages.push(...mmlMessages);
+        valid = (valid && mmlValid);
+      }
+    }

  } else {
    valid = false;
@ -417,6 +432,30 @@ export function validateModelMemoryLimit(job, limits) {
  };
 }

+export function validateModelMemoryLimitUnits(job) {
+  const messages = [];
+  let valid = true;
+
+  if (typeof job.analysis_limits !== 'undefined' && typeof job.analysis_limits.model_memory_limit !== 'undefined') {
+    const mml = job.analysis_limits.model_memory_limit.toUpperCase();
+    const mmlSplit = mml.match(/\d+(\w+)/);
+    const unit = (mmlSplit && mmlSplit.length === 2) ? mmlSplit[1] : null;
+
+    if (ALLOWED_DATA_UNITS.indexOf(unit) === -1) {
+      messages.push({ id: 'model_memory_limit_units_invalid' });
+      valid = false;
+    } else {
+      messages.push({ id: 'model_memory_limit_units_valid' });
+    }
+  }
+  return {
+    valid,
+    messages,
+    contains: id =>  (messages.some(m => id === m.id)),
+    find: id => (messages.find(m => id === m.id)),
+  };
+}
+
 export function validateGroupNames(job) {
  const messages = [];
  let valid = true;
--- a/x-pack/plugins/ml/public/jobs/jobs_list/components/edit_job_flyout/validate_job.js
+++ b/x-pack/plugins/ml/public/jobs/jobs_list/components/edit_job_flyout/validate_job.js
@ -11,6 +11,7 @@ import { populateValidationMessages } from 'plugins/ml/jobs/new_job/simple/compo
 import {
  validateModelMemoryLimit as validateModelMemoryLimitUtils,
  validateGroupNames as validateGroupNamesUtils,
+  validateModelMemoryLimitUnits as validateModelMemoryLimitUnitsUtils,
 } from 'plugins/ml/../common/util/job_utils';

 export function validateModelMemoryLimit(mml) {
@ -20,8 +21,14 @@ export function validateModelMemoryLimit(mml) {
      model_memory_limit: mml
    }
  };
-  const validationResults = validateModelMemoryLimitUtils(tempJob, limits);
-  const { valid } = validationResults;
+
+  let validationResults = validateModelMemoryLimitUnitsUtils(tempJob);
+  let { valid } = validationResults;
+
+  if(valid) {
+    validationResults = validateModelMemoryLimitUtils(tempJob, limits);
+    valid = validationResults.valid;
+  }

  const modelMemoryLimit = {
    valid,
--- a/x-pack/plugins/ml/public/jobs/new_job/advanced/new_job_controller.js
+++ b/x-pack/plugins/ml/public/jobs/new_job/advanced/new_job_controller.js
@ -1026,6 +1026,12 @@ module.controller('MlNewJob',
          tabs[0].checks.groupIds.message = msg;
        }

+        if (validationResults.contains('model_memory_limit_units_invalid')) {
+          tabs[0].checks.modelMemoryLimit.valid = false;
+          const msg = `Model memory limit data unit unrecognized. It must be B, KB, MB, GB, TB or PB`;
+          tabs[0].checks.modelMemoryLimit.message = msg;
+        }
+
        if (validationResults.contains('model_memory_limit_invalid')) {
          tabs[0].checks.modelMemoryLimit.valid = false;
          const msg = `Model memory limit cannot be higher than the maximum value of ${limits.max_model_memory_limit.toUpperCase()}`;
--- a/x-pack/plugins/ml/public/jobs/new_job/simple/components/utils/validate_job.js
+++ b/x-pack/plugins/ml/public/jobs/new_job/simple/components/utils/validate_job.js
@ -50,6 +50,12 @@ export function populateValidationMessages(validationResults, checks) {
    checks.groupIds.message = msg;
  }

+  if (validationResults.contains('model_memory_limit_units_invalid')) {
+    checks.modelMemoryLimit.valid = false;
+    const msg = `Model memory limit data unit unrecognized. It must be B, KB, MB, GB, TB or PB`;
+    checks.modelMemoryLimit.message = msg;
+  }
+
  if (validationResults.contains('model_memory_limit_invalid')) {
    checks.modelMemoryLimit.valid = false;
    const msg = `Model memory limit cannot be higher than the maximum value of ${limits.max_model_memory_limit.toUpperCase()}`;
--- a/x-pack/plugins/ml/server/models/job_validation/tests/validate_model_memory_limit.js
+++ b/x-pack/plugins/ml/server/models/job_validation/tests/validate_model_memory_limit.js
@ -206,6 +206,48 @@ describe('ML - validateModelMemoryLimit', () => {
    );
  });

+  it('Called with specified invalid mml of "10mbananas"', () => {
+    const dtrs = createDetectors(1);
+    const job = getJobConfig(['instance'], dtrs);
+    const duration = { start: 0, end: 1 };
+    job.analysis_limits.model_memory_limit = '10mbananas';
+
+    return validateModelMemoryLimit(callWithRequest, job, duration).then(
+      (messages) => {
+        const ids = messages.map(m => m.id);
+        expect(ids).to.eql(['mml_value_invalid']);
+      }
+    );
+  });
+
+  it('Called with specified invalid mml of "10"', () => {
+    const dtrs = createDetectors(1);
+    const job = getJobConfig(['instance'], dtrs);
+    const duration = { start: 0, end: 1 };
+    job.analysis_limits.model_memory_limit = '10';
+
+    return validateModelMemoryLimit(callWithRequest, job, duration).then(
+      (messages) => {
+        const ids = messages.map(m => m.id);
+        expect(ids).to.eql(['mml_value_invalid']);
+      }
+    );
+  });
+
+  it('Called with specified invalid mml of "mb"', () => {
+    const dtrs = createDetectors(1);
+    const job = getJobConfig(['instance'], dtrs);
+    const duration = { start: 0, end: 1 };
+    job.analysis_limits.model_memory_limit = 'mb';
+
+    return validateModelMemoryLimit(callWithRequest, job, duration).then(
+      (messages) => {
+        const ids = messages.map(m => m.id);
+        expect(ids).to.eql(['mml_value_invalid']);
+      }
+    );
+  });
+
  it('Called with specified invalid mml of "asdf"', () => {
    const dtrs = createDetectors(1);
    const job = getJobConfig(['instance'], dtrs);
--- a/x-pack/plugins/ml/server/models/job_validation/job_validation.js
+++ b/x-pack/plugins/ml/server/models/job_validation/job_validation.js
@ -44,7 +44,7 @@ export async function validateJob(callWithRequest, payload, kbnVersion = 'curren
    // check if basic tests pass the requirements to run the extended tests.
    // if so, run the extended tests and merge the messages.
    // otherwise just return the basic test messages.
-    const basicValidation = basicJobValidation(job, fields, {});
+    const basicValidation = basicJobValidation(job, fields, {}, true);
    let validationMessages;

    if (basicValidation.valid === true) {
--- a/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.js
+++ b/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.js
@ -9,6 +9,7 @@
 import numeral from '@elastic/numeral';
 import { validateJobObject } from './validate_job_object';
 import { calculateModelMemoryLimitProvider } from '../../models/calculate_model_memory_limit';
+import { ALLOWED_DATA_UNITS } from '../../../common/constants/validation';

 export async function validateModelMemoryLimit(callWithRequest, job, duration) {
  validateJobObject(job);
@ -28,6 +29,7 @@ export async function validateModelMemoryLimit(callWithRequest, job, duration) {
  let splitFieldName = '';
  const fieldNames = [];
  let runCalcModelMemoryTest = true;
+  let validModelMemoryLimit = true;

  // extract the field names and partition field names from the detectors
  // we only want to estimate the mml for multi-metric jobs.
@ -62,66 +64,85 @@ export async function validateModelMemoryLimit(callWithRequest, job, duration) {
  }

  const messages = [];
-  if (runCalcModelMemoryTest) {
-    const mmlEstimate = await calculateModelMemoryLimitProvider(callWithRequest)(
-      job.datafeed_config.indices.join(','),
-      splitFieldName,
-      job.datafeed_config.query,
-      fieldNames,
-      job.analysis_config.influencers,
-      job.data_description.time_field,
-      duration.start,
-      duration.end,
-      true);
-    const mmlEstimateBytes = numeral(mmlEstimate.modelMemoryLimit).value();

-    let runEstimateGreaterThenMml = true;
-    // if max_model_memory_limit has been set,
-    // make sure the estimated value is not greater than it.
-    if (typeof maxModelMemoryLimit !== 'undefined') {
-      const maxMmlBytes = numeral(maxModelMemoryLimit.toUpperCase()).value();
-      if (mmlEstimateBytes > maxMmlBytes) {
-        runEstimateGreaterThenMml = false;
-        messages.push({
-          id: 'estimated_mml_greater_than_max_mml',
-          maxModelMemoryLimit,
-          mmlEstimate
-        });
-      }
-    }
+  // check that mml is a valid data format
+  if (mml !== null) {
+    const mmlSplit = mml.match(/\d+(\w+)/);
+    const unit = (mmlSplit && mmlSplit.length === 2) ? mmlSplit[1] : null;

-    // check to see if the estimated mml is greater that the user
-    // specified mml
-    // do not run this if we've already found that it's larger than
-    // the max mml
-    if (runEstimateGreaterThenMml && mml !== null) {
-      const mmlBytes = numeral(mml).value();
-      if (mmlBytes === 0) {
-        messages.push({
-          id: 'mml_value_invalid',
-          mml
-        });
-      } else if (mmlEstimateBytes > mmlBytes) {
-        messages.push({
-          id: 'estimated_mml_greater_than_mml',
-          maxModelMemoryLimit,
-          mml
-        });
-      }
+    if (ALLOWED_DATA_UNITS.indexOf(unit) === -1) {
+      messages.push({
+        id: 'mml_value_invalid',
+        mml
+      });
+      // mml is not a valid data format.
+      // abort all other tests
+      validModelMemoryLimit = false;
    }
  }

-  // if max_model_memory_limit has been set,
-  // make sure the user defined MML is not greater than it
-  if (maxModelMemoryLimit !== undefined && mml !== null) {
-    const maxMmlBytes = numeral(maxModelMemoryLimit.toUpperCase()).value();
-    const mmlBytes = numeral(mml).value();
-    if (mmlBytes > maxMmlBytes) {
-      messages.push({
-        id: 'mml_greater_than_max_mml',
-        maxModelMemoryLimit,
-        mml
-      });
+  if (validModelMemoryLimit) {
+    if(runCalcModelMemoryTest) {
+      const mmlEstimate = await calculateModelMemoryLimitProvider(callWithRequest)(
+        job.datafeed_config.indices.join(','),
+        splitFieldName,
+        job.datafeed_config.query,
+        fieldNames,
+        job.analysis_config.influencers,
+        job.data_description.time_field,
+        duration.start,
+        duration.end,
+        true);
+      const mmlEstimateBytes = numeral(mmlEstimate.modelMemoryLimit).value();
+
+      let runEstimateGreaterThenMml = true;
+      // if max_model_memory_limit has been set,
+      // make sure the estimated value is not greater than it.
+      if (typeof maxModelMemoryLimit !== 'undefined') {
+        const maxMmlBytes = numeral(maxModelMemoryLimit.toUpperCase()).value();
+        if (mmlEstimateBytes > maxMmlBytes) {
+          runEstimateGreaterThenMml = false;
+          messages.push({
+            id: 'estimated_mml_greater_than_max_mml',
+            maxModelMemoryLimit,
+            mmlEstimate
+          });
+        }
+      }
+
+      // check to see if the estimated mml is greater that the user
+      // specified mml
+      // do not run this if we've already found that it's larger than
+      // the max mml
+      if (runEstimateGreaterThenMml && mml !== null) {
+        const mmlBytes = numeral(mml).value();
+        if (mmlBytes === 0) {
+          messages.push({
+            id: 'mml_value_invalid',
+            mml
+          });
+        } else if (mmlEstimateBytes > mmlBytes) {
+          messages.push({
+            id: 'estimated_mml_greater_than_mml',
+            maxModelMemoryLimit,
+            mml
+          });
+        }
+      }
+    }
+
+    // if max_model_memory_limit has been set,
+    // make sure the user defined MML is not greater than it
+    if (maxModelMemoryLimit !== undefined && mml !== null) {
+      const maxMmlBytes = numeral(maxModelMemoryLimit.toUpperCase()).value();
+      const mmlBytes = numeral(mml).value();
+      if (mmlBytes > maxMmlBytes) {
+        messages.push({
+          id: 'mml_greater_than_max_mml',
+          maxModelMemoryLimit,
+          mml
+        });
+      }
    }
  }