[ML] Better model memory limit validation (#21270)

This commit is contained in:
James Gowdy 2018-07-27 07:49:17 +01:00 committed by GitHub
parent b328bd0e3d
commit 60706bf77a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 189 additions and 66 deletions

View file

@ -15,3 +15,5 @@ export const VALIDATION_STATUS = {
};
export const SKIP_BUCKET_SPAN_ESTIMATION = true;
export const ALLOWED_DATA_UNITS = ['B', 'KB', 'MB', 'GB', 'TB', 'PB'];

View file

@ -10,6 +10,7 @@ import _ from 'lodash';
import semver from 'semver';
import numeral from '@elastic/numeral';
import { ALLOWED_DATA_UNITS } from '../constants/validation';
import { parseInterval } from './parse_interval';
// work out the default frequency based on the bucket_span in seconds
@ -235,7 +236,7 @@ export function uniqWithIsEqual(arr) {
// check job without manipulating UI and return a list of messages
// job and fields get passed as arguments and are not accessed as $scope.* via the outer scope
// because the plan is to move this function to the common code area so that it can be used on the server side too.
export function basicJobValidation(job, fields, limits) {
export function basicJobValidation(job, fields, limits, skipMmlChecks = false) {
const messages = [];
let valid = true;
@ -368,14 +369,28 @@ export function basicJobValidation(job, fields, limits) {
}
}
// model memory limit
const {
messages: mmlMessages,
valid: mmlValid,
} = validateModelMemoryLimit(job, limits);
if (skipMmlChecks === false) {
// model memory limit
const {
messages: mmlUnitMessages,
valid: mmlUnitValid,
} = validateModelMemoryLimitUnits(job);
messages.push(...mmlMessages);
valid = (valid && mmlValid);
messages.push(...mmlUnitMessages);
valid = (valid && mmlUnitValid);
if (mmlUnitValid) {
// if mml is a valid format,
// run the validation against max mml
const {
messages: mmlMessages,
valid: mmlValid,
} = validateModelMemoryLimit(job, limits);
messages.push(...mmlMessages);
valid = (valid && mmlValid);
}
}
} else {
valid = false;
@ -417,6 +432,30 @@ export function validateModelMemoryLimit(job, limits) {
};
}
export function validateModelMemoryLimitUnits(job) {
const messages = [];
let valid = true;
if (typeof job.analysis_limits !== 'undefined' && typeof job.analysis_limits.model_memory_limit !== 'undefined') {
const mml = job.analysis_limits.model_memory_limit.toUpperCase();
const mmlSplit = mml.match(/\d+(\w+)/);
const unit = (mmlSplit && mmlSplit.length === 2) ? mmlSplit[1] : null;
if (ALLOWED_DATA_UNITS.indexOf(unit) === -1) {
messages.push({ id: 'model_memory_limit_units_invalid' });
valid = false;
} else {
messages.push({ id: 'model_memory_limit_units_valid' });
}
}
return {
valid,
messages,
contains: id => (messages.some(m => id === m.id)),
find: id => (messages.find(m => id === m.id)),
};
}
export function validateGroupNames(job) {
const messages = [];
let valid = true;

View file

@ -11,6 +11,7 @@ import { populateValidationMessages } from 'plugins/ml/jobs/new_job/simple/compo
import {
validateModelMemoryLimit as validateModelMemoryLimitUtils,
validateGroupNames as validateGroupNamesUtils,
validateModelMemoryLimitUnits as validateModelMemoryLimitUnitsUtils,
} from 'plugins/ml/../common/util/job_utils';
export function validateModelMemoryLimit(mml) {
@ -20,8 +21,14 @@ export function validateModelMemoryLimit(mml) {
model_memory_limit: mml
}
};
const validationResults = validateModelMemoryLimitUtils(tempJob, limits);
const { valid } = validationResults;
let validationResults = validateModelMemoryLimitUnitsUtils(tempJob);
let { valid } = validationResults;
if(valid) {
validationResults = validateModelMemoryLimitUtils(tempJob, limits);
valid = validationResults.valid;
}
const modelMemoryLimit = {
valid,

View file

@ -1026,6 +1026,12 @@ module.controller('MlNewJob',
tabs[0].checks.groupIds.message = msg;
}
if (validationResults.contains('model_memory_limit_units_invalid')) {
tabs[0].checks.modelMemoryLimit.valid = false;
const msg = `Model memory limit data unit unrecognized. It must be B, KB, MB, GB, TB or PB`;
tabs[0].checks.modelMemoryLimit.message = msg;
}
if (validationResults.contains('model_memory_limit_invalid')) {
tabs[0].checks.modelMemoryLimit.valid = false;
const msg = `Model memory limit cannot be higher than the maximum value of ${limits.max_model_memory_limit.toUpperCase()}`;

View file

@ -50,6 +50,12 @@ export function populateValidationMessages(validationResults, checks) {
checks.groupIds.message = msg;
}
if (validationResults.contains('model_memory_limit_units_invalid')) {
checks.modelMemoryLimit.valid = false;
const msg = `Model memory limit data unit unrecognized. It must be B, KB, MB, GB, TB or PB`;
checks.modelMemoryLimit.message = msg;
}
if (validationResults.contains('model_memory_limit_invalid')) {
checks.modelMemoryLimit.valid = false;
const msg = `Model memory limit cannot be higher than the maximum value of ${limits.max_model_memory_limit.toUpperCase()}`;

View file

@ -206,6 +206,48 @@ describe('ML - validateModelMemoryLimit', () => {
);
});
it('Called with specified invalid mml of "10mbananas"', () => {
const dtrs = createDetectors(1);
const job = getJobConfig(['instance'], dtrs);
const duration = { start: 0, end: 1 };
job.analysis_limits.model_memory_limit = '10mbananas';
return validateModelMemoryLimit(callWithRequest, job, duration).then(
(messages) => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['mml_value_invalid']);
}
);
});
it('Called with specified invalid mml of "10"', () => {
const dtrs = createDetectors(1);
const job = getJobConfig(['instance'], dtrs);
const duration = { start: 0, end: 1 };
job.analysis_limits.model_memory_limit = '10';
return validateModelMemoryLimit(callWithRequest, job, duration).then(
(messages) => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['mml_value_invalid']);
}
);
});
it('Called with specified invalid mml of "mb"', () => {
const dtrs = createDetectors(1);
const job = getJobConfig(['instance'], dtrs);
const duration = { start: 0, end: 1 };
job.analysis_limits.model_memory_limit = 'mb';
return validateModelMemoryLimit(callWithRequest, job, duration).then(
(messages) => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['mml_value_invalid']);
}
);
});
it('Called with specified invalid mml of "asdf"', () => {
const dtrs = createDetectors(1);
const job = getJobConfig(['instance'], dtrs);

View file

@ -44,7 +44,7 @@ export async function validateJob(callWithRequest, payload, kbnVersion = 'curren
// check if basic tests pass the requirements to run the extended tests.
// if so, run the extended tests and merge the messages.
// otherwise just return the basic test messages.
const basicValidation = basicJobValidation(job, fields, {});
const basicValidation = basicJobValidation(job, fields, {}, true);
let validationMessages;
if (basicValidation.valid === true) {

View file

@ -9,6 +9,7 @@
import numeral from '@elastic/numeral';
import { validateJobObject } from './validate_job_object';
import { calculateModelMemoryLimitProvider } from '../../models/calculate_model_memory_limit';
import { ALLOWED_DATA_UNITS } from '../../../common/constants/validation';
export async function validateModelMemoryLimit(callWithRequest, job, duration) {
validateJobObject(job);
@ -28,6 +29,7 @@ export async function validateModelMemoryLimit(callWithRequest, job, duration) {
let splitFieldName = '';
const fieldNames = [];
let runCalcModelMemoryTest = true;
let validModelMemoryLimit = true;
// extract the field names and partition field names from the detectors
// we only want to estimate the mml for multi-metric jobs.
@ -62,66 +64,85 @@ export async function validateModelMemoryLimit(callWithRequest, job, duration) {
}
const messages = [];
if (runCalcModelMemoryTest) {
const mmlEstimate = await calculateModelMemoryLimitProvider(callWithRequest)(
job.datafeed_config.indices.join(','),
splitFieldName,
job.datafeed_config.query,
fieldNames,
job.analysis_config.influencers,
job.data_description.time_field,
duration.start,
duration.end,
true);
const mmlEstimateBytes = numeral(mmlEstimate.modelMemoryLimit).value();
let runEstimateGreaterThenMml = true;
// if max_model_memory_limit has been set,
// make sure the estimated value is not greater than it.
if (typeof maxModelMemoryLimit !== 'undefined') {
const maxMmlBytes = numeral(maxModelMemoryLimit.toUpperCase()).value();
if (mmlEstimateBytes > maxMmlBytes) {
runEstimateGreaterThenMml = false;
messages.push({
id: 'estimated_mml_greater_than_max_mml',
maxModelMemoryLimit,
mmlEstimate
});
}
}
// check that mml is a valid data format
if (mml !== null) {
const mmlSplit = mml.match(/\d+(\w+)/);
const unit = (mmlSplit && mmlSplit.length === 2) ? mmlSplit[1] : null;
// check to see if the estimated mml is greater that the user
// specified mml
// do not run this if we've already found that it's larger than
// the max mml
if (runEstimateGreaterThenMml && mml !== null) {
const mmlBytes = numeral(mml).value();
if (mmlBytes === 0) {
messages.push({
id: 'mml_value_invalid',
mml
});
} else if (mmlEstimateBytes > mmlBytes) {
messages.push({
id: 'estimated_mml_greater_than_mml',
maxModelMemoryLimit,
mml
});
}
if (ALLOWED_DATA_UNITS.indexOf(unit) === -1) {
messages.push({
id: 'mml_value_invalid',
mml
});
// mml is not a valid data format.
// abort all other tests
validModelMemoryLimit = false;
}
}
// if max_model_memory_limit has been set,
// make sure the user defined MML is not greater than it
if (maxModelMemoryLimit !== undefined && mml !== null) {
const maxMmlBytes = numeral(maxModelMemoryLimit.toUpperCase()).value();
const mmlBytes = numeral(mml).value();
if (mmlBytes > maxMmlBytes) {
messages.push({
id: 'mml_greater_than_max_mml',
maxModelMemoryLimit,
mml
});
if (validModelMemoryLimit) {
if(runCalcModelMemoryTest) {
const mmlEstimate = await calculateModelMemoryLimitProvider(callWithRequest)(
job.datafeed_config.indices.join(','),
splitFieldName,
job.datafeed_config.query,
fieldNames,
job.analysis_config.influencers,
job.data_description.time_field,
duration.start,
duration.end,
true);
const mmlEstimateBytes = numeral(mmlEstimate.modelMemoryLimit).value();
let runEstimateGreaterThenMml = true;
// if max_model_memory_limit has been set,
// make sure the estimated value is not greater than it.
if (typeof maxModelMemoryLimit !== 'undefined') {
const maxMmlBytes = numeral(maxModelMemoryLimit.toUpperCase()).value();
if (mmlEstimateBytes > maxMmlBytes) {
runEstimateGreaterThenMml = false;
messages.push({
id: 'estimated_mml_greater_than_max_mml',
maxModelMemoryLimit,
mmlEstimate
});
}
}
// check to see if the estimated mml is greater that the user
// specified mml
// do not run this if we've already found that it's larger than
// the max mml
if (runEstimateGreaterThenMml && mml !== null) {
const mmlBytes = numeral(mml).value();
if (mmlBytes === 0) {
messages.push({
id: 'mml_value_invalid',
mml
});
} else if (mmlEstimateBytes > mmlBytes) {
messages.push({
id: 'estimated_mml_greater_than_mml',
maxModelMemoryLimit,
mml
});
}
}
}
// if max_model_memory_limit has been set,
// make sure the user defined MML is not greater than it
if (maxModelMemoryLimit !== undefined && mml !== null) {
const maxMmlBytes = numeral(maxModelMemoryLimit.toUpperCase()).value();
const mmlBytes = numeral(mml).value();
if (mmlBytes > maxMmlBytes) {
messages.push({
id: 'mml_greater_than_max_mml',
maxModelMemoryLimit,
mml
});
}
}
}