[ML] Validate manual model memory input (#59056) (#59287)

* [ML] validate mml based on estimated value

* [ML] better memoize

* [ML] memoryInputValidator unit tests

* [ML] cache mml errors

* [ML] prevent override

* [ML] fix validators, add unit tests

* [ML] ignore typing issue with numeral

* [ML] fix validateMinMML

* [ML] fix useCreateAnalyticsForm test

* [ML] setEstimatedModelMemoryLimit to the fallback value in case of an error
This commit is contained in:
Dima Arnautov 2020-03-04 13:27:08 +01:00 committed by GitHub
parent 91babc7628
commit b89066baed
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 197 additions and 26 deletions

View file

@ -4,7 +4,7 @@
* you may not use this file except in compliance with the Elastic License.
*/
import { maxLengthValidator } from './validators';
import { maxLengthValidator, memoryInputValidator } from './validators';
describe('maxLengthValidator', () => {
test('should allow a valid input', () => {
@ -20,3 +20,29 @@ describe('maxLengthValidator', () => {
});
});
});
describe('memoryInputValidator', () => {
test('should detect missing units', () => {
expect(memoryInputValidator()('10')).toEqual({
invalidUnits: {
allowedUnits: 'B, KB, MB, GB, TB, PB',
},
});
});
test('should accept valid input', () => {
expect(memoryInputValidator()('100PB')).toEqual(null);
});
test('should accept valid input with custom allowed units', () => {
expect(memoryInputValidator(['B', 'KB'])('100KB')).toEqual(null);
});
test('should detect not allowed units', () => {
expect(memoryInputValidator(['B', 'KB'])('100MB')).toEqual({
invalidUnits: {
allowedUnits: 'B, KB',
},
});
});
});

View file

@ -4,6 +4,8 @@
* you may not use this file except in compliance with the Elastic License.
*/
import { ALLOWED_DATA_UNITS } from '../constants/validation';
/**
* Provides a validator function for maximum allowed input length.
* @param maxLength Maximum length allowed.
@ -44,8 +46,8 @@ export function patternValidator(
* @param validators
*/
export function composeValidators(
...validators: Array<(value: string) => { [key: string]: any } | null>
): (value: string) => { [key: string]: any } | null {
...validators: Array<(value: any) => { [key: string]: any } | null>
): (value: any) => { [key: string]: any } | null {
return value => {
const validationResult = validators.reduce((acc, validator) => {
return {
@ -56,3 +58,21 @@ export function composeValidators(
return Object.keys(validationResult).length > 0 ? validationResult : null;
};
}
export function requiredValidator() {
return (value: any) => {
return value === '' || value === undefined || value === null ? { required: true } : null;
};
}
export function memoryInputValidator(allowedUnits = ALLOWED_DATA_UNITS) {
return (value: any) => {
if (typeof value !== 'string' || value === '') {
return null;
}
const regexp = new RegExp(`\\d+(${allowedUnits.join('|')})$`, 'i');
return regexp.test(value.trim())
? null
: { invalidUnits: { allowedUnits: allowedUnits.join(', ') } };
};
}

View file

@ -4,7 +4,7 @@
* you may not use this file except in compliance with the Elastic License.
*/
import React, { Fragment, FC, useEffect } from 'react';
import React, { Fragment, FC, useEffect, useMemo } from 'react';
import {
EuiComboBox,
@ -36,7 +36,7 @@ import { JOB_ID_MAX_LENGTH } from '../../../../../../../common/constants/validat
import { Messages } from './messages';
import { JobType } from './job_type';
import { JobDescriptionInput } from './job_description';
import { mmlUnitInvalidErrorMessage } from '../../hooks/use_create_analytics_form/reducer';
import { getModelMemoryLimitErrors } from '../../hooks/use_create_analytics_form/reducer';
import {
IndexPattern,
indexPatterns,
@ -49,7 +49,7 @@ export const CreateAnalyticsForm: FC<CreateAnalyticsFormProps> = ({ actions, sta
services: { docLinks },
} = useMlKibana();
const { ELASTIC_WEBSITE_URL, DOC_LINK_VERSION } = docLinks;
const { setFormState } = actions;
const { setFormState, setEstimatedModelMemoryLimit } = actions;
const mlContext = useMlContext();
const { form, indexPatternsMap, isAdvancedEditorEnabled, isJobCreated, requestMessages } = state;
@ -77,7 +77,7 @@ export const CreateAnalyticsForm: FC<CreateAnalyticsFormProps> = ({ actions, sta
loadingFieldOptions,
maxDistinctValuesError,
modelMemoryLimit,
modelMemoryLimitUnitValid,
modelMemoryLimitValidationResult,
previousJobType,
previousSourceIndex,
sourceIndex,
@ -89,6 +89,10 @@ export const CreateAnalyticsForm: FC<CreateAnalyticsFormProps> = ({ actions, sta
} = form;
const characterList = indexPatterns.ILLEGAL_CHARACTERS_VISIBLE.join(', ');
const mmlErrors = useMemo(() => getModelMemoryLimitErrors(modelMemoryLimitValidationResult), [
modelMemoryLimitValidationResult,
]);
const isJobTypeWithDepVar =
jobType === JOB_TYPES.REGRESSION || jobType === JOB_TYPES.CLASSIFICATION;
@ -154,6 +158,9 @@ export const CreateAnalyticsForm: FC<CreateAnalyticsFormProps> = ({ actions, sta
const resp: DfAnalyticsExplainResponse = await ml.dataFrameAnalytics.explainDataFrameAnalytics(
jobConfig
);
const expectedMemoryWithoutDisk = resp.memory_estimation?.expected_memory_without_disk;
setEstimatedModelMemoryLimit(expectedMemoryWithoutDisk);
// If sourceIndex has changed load analysis field options again
if (previousSourceIndex !== sourceIndex || previousJobType !== jobType) {
@ -168,7 +175,7 @@ export const CreateAnalyticsForm: FC<CreateAnalyticsFormProps> = ({ actions, sta
}
setFormState({
modelMemoryLimit: resp.memory_estimation?.expected_memory_without_disk,
...(!modelMemoryLimit ? { modelMemoryLimit: expectedMemoryWithoutDisk } : {}),
excludesOptions: analyzedFieldsOptions,
loadingFieldOptions: false,
fieldOptionsFetchFail: false,
@ -176,7 +183,7 @@ export const CreateAnalyticsForm: FC<CreateAnalyticsFormProps> = ({ actions, sta
});
} else {
setFormState({
modelMemoryLimit: resp.memory_estimation?.expected_memory_without_disk,
...(!modelMemoryLimit ? { modelMemoryLimit: expectedMemoryWithoutDisk } : {}),
});
}
} catch (e) {
@ -189,14 +196,16 @@ export const CreateAnalyticsForm: FC<CreateAnalyticsFormProps> = ({ actions, sta
) {
errorMessage = e.message;
}
const fallbackModelMemoryLimit =
jobType !== undefined
? DEFAULT_MODEL_MEMORY_LIMIT[jobType]
: DEFAULT_MODEL_MEMORY_LIMIT.outlier_detection;
setEstimatedModelMemoryLimit(fallbackModelMemoryLimit);
setFormState({
fieldOptionsFetchFail: true,
maxDistinctValuesError: errorMessage,
loadingFieldOptions: false,
modelMemoryLimit:
jobType !== undefined
? DEFAULT_MODEL_MEMORY_LIMIT[jobType]
: DEFAULT_MODEL_MEMORY_LIMIT.outlier_detection,
modelMemoryLimit: fallbackModelMemoryLimit,
});
}
}, 400);
@ -642,7 +651,8 @@ export const CreateAnalyticsForm: FC<CreateAnalyticsFormProps> = ({ actions, sta
label={i18n.translate('xpack.ml.dataframe.analytics.create.modelMemoryLimitLabel', {
defaultMessage: 'Model memory limit',
})}
helpText={!modelMemoryLimitUnitValid && mmlUnitInvalidErrorMessage}
isInvalid={modelMemoryLimitValidationResult !== null}
error={mmlErrors}
>
<EuiFieldText
placeholder={
@ -653,7 +663,7 @@ export const CreateAnalyticsForm: FC<CreateAnalyticsFormProps> = ({ actions, sta
disabled={isJobCreated}
value={modelMemoryLimit || ''}
onChange={e => setFormState({ modelMemoryLimit: e.target.value })}
isInvalid={modelMemoryLimit === ''}
isInvalid={modelMemoryLimitValidationResult !== null}
data-test-subj="mlAnalyticsCreateJobFlyoutModelMemoryInput"
/>
</EuiFormRow>

View file

@ -24,6 +24,7 @@ export enum ACTION {
SET_JOB_CONFIG,
SET_JOB_IDS,
SWITCH_TO_ADVANCED_EDITOR,
SET_ESTIMATED_MODEL_MEMORY_LIMIT,
}
export type Action =
@ -59,7 +60,8 @@ export type Action =
}
| { type: ACTION.SET_IS_MODAL_VISIBLE; isModalVisible: State['isModalVisible'] }
| { type: ACTION.SET_JOB_CONFIG; payload: State['jobConfig'] }
| { type: ACTION.SET_JOB_IDS; jobIds: State['jobIds'] };
| { type: ACTION.SET_JOB_IDS; jobIds: State['jobIds'] }
| { type: ACTION.SET_ESTIMATED_MODEL_MEMORY_LIMIT; value: State['estimatedModelMemoryLimit'] };
// Actions wrapping the dispatcher exposed by the custom hook
export interface ActionDispatchers {
@ -73,4 +75,5 @@ export interface ActionDispatchers {
setJobConfig: (payload: State['jobConfig']) => void;
startAnalyticsJob: () => void;
switchToAdvancedEditor: () => void;
setEstimatedModelMemoryLimit: (value: State['estimatedModelMemoryLimit']) => void;
}

View file

@ -9,7 +9,7 @@ import { merge } from 'lodash';
import { DataFrameAnalyticsConfig } from '../../../../common';
import { ACTION } from './actions';
import { reducer, validateAdvancedEditor } from './reducer';
import { reducer, validateAdvancedEditor, validateMinMML } from './reducer';
import { getInitialState, JOB_TYPES } from './state';
type SourceIndex = DataFrameAnalyticsConfig['source']['index'];
@ -41,13 +41,19 @@ describe('useCreateAnalyticsForm', () => {
const initialState = getInitialState();
expect(initialState.isValid).toBe(false);
const updatedState = reducer(initialState, {
const stateWithEstimatedMml = reducer(initialState, {
type: ACTION.SET_ESTIMATED_MODEL_MEMORY_LIMIT,
value: '182222kb',
});
const updatedState = reducer(stateWithEstimatedMml, {
type: ACTION.SET_FORM_STATE,
payload: {
destinationIndex: 'the-destination-index',
jobId: 'the-analytics-job-id',
sourceIndex: 'the-source-index',
jobType: JOB_TYPES.OUTLIER_DETECTION,
modelMemoryLimit: '200mb',
},
});
expect(updatedState.isValid).toBe(true);
@ -146,3 +152,23 @@ describe('useCreateAnalyticsForm', () => {
).toBe(false);
});
});
describe('validateMinMML', () => {
test('should detect a lower value', () => {
expect(validateMinMML('10mb')('100kb')).toEqual({
min: { minValue: '10mb', actualValue: '100kb' },
});
});
test('should allow a bigger value', () => {
expect(validateMinMML('10mb')('1GB')).toEqual(null);
});
test('should allow the same value', () => {
expect(validateMinMML('1024mb')('1gb')).toEqual(null);
});
test('should ignore empty parameters', () => {
expect(validateMinMML((undefined as unknown) as string)('')).toEqual(null);
});
});

View file

@ -5,6 +5,9 @@
*/
import { i18n } from '@kbn/i18n';
import { memoize } from 'lodash';
// @ts-ignore
import numeral from '@elastic/numeral';
import { isValidIndexName } from '../../../../../../../common/util/es_utils';
import { Action, ACTION } from './actions';
@ -13,7 +16,12 @@ import {
isJobIdValid,
validateModelMemoryLimitUnits,
} from '../../../../../../../common/util/job_utils';
import { maxLengthValidator } from '../../../../../../../common/util/validators';
import {
composeValidators,
maxLengthValidator,
memoryInputValidator,
requiredValidator,
} from '../../../../../../../common/util/validators';
import {
JOB_ID_MAX_LENGTH,
ALLOWED_DATA_UNITS,
@ -37,6 +45,38 @@ export const mmlUnitInvalidErrorMessage = i18n.translate(
}
);
/**
* Returns the list of model memory limit errors based on validation result.
* @param mmlValidationResult
*/
export function getModelMemoryLimitErrors(mmlValidationResult: any): string[] | null {
if (mmlValidationResult === null) {
return null;
}
return Object.keys(mmlValidationResult).reduce((acc, errorKey) => {
if (errorKey === 'min') {
acc.push(
i18n.translate('xpack.ml.dataframe.analytics.create.modelMemoryUnitsMinError', {
defaultMessage: 'Model memory limit cannot be lower than {mml}',
values: {
mml: mmlValidationResult.min.minValue,
},
})
);
}
if (errorKey === 'invalidUnits') {
acc.push(
i18n.translate('xpack.ml.dataframe.analytics.create.modelMemoryUnitsInvalidError', {
defaultMessage: 'Model memory limit data unit unrecognized. It must be {str}',
values: { str: mmlAllowedUnitsStr },
})
);
}
return acc;
}, [] as string[]);
}
const getSourceIndexString = (state: State) => {
const { jobConfig } = state;
@ -222,6 +262,39 @@ export const validateAdvancedEditor = (state: State): State => {
return state;
};
/**
* Validates provided MML isn't lower than the estimated one.
*/
export function validateMinMML(estimatedMml: string) {
return (mml: string) => {
if (!mml || !estimatedMml) {
return null;
}
// @ts-ignore
const mmlInBytes = numeral(mml.toUpperCase()).value();
// @ts-ignore
const estimatedMmlInBytes = numeral(estimatedMml.toUpperCase()).value();
return estimatedMmlInBytes > mmlInBytes
? { min: { minValue: estimatedMml, actualValue: mml } }
: null;
};
}
/**
* Result validator function for the MML.
* Re-init only if the estimated mml has been changed.
*/
const mmlValidator = memoize((estimatedMml: string) =>
composeValidators(requiredValidator(), validateMinMML(estimatedMml), memoryInputValidator())
);
const validateMml = memoize(
(estimatedMml: string, mml: string | undefined) => mmlValidator(estimatedMml)(mml),
(...args: any) => args.join('_')
);
const validateForm = (state: State): State => {
const {
jobIdEmpty,
@ -238,22 +311,21 @@ const validateForm = (state: State): State => {
maxDistinctValuesError,
modelMemoryLimit,
} = state.form;
const { estimatedModelMemoryLimit } = state;
const jobTypeEmpty = jobType === undefined;
const dependentVariableEmpty =
(jobType === JOB_TYPES.REGRESSION || jobType === JOB_TYPES.CLASSIFICATION) &&
dependentVariable === '';
const modelMemoryLimitEmpty = modelMemoryLimit === '';
if (!modelMemoryLimitEmpty && modelMemoryLimit !== undefined) {
const { valid } = validateModelMemoryLimitUnits(modelMemoryLimit);
state.form.modelMemoryLimitUnitValid = valid;
}
const mmlValidationResult = validateMml(estimatedModelMemoryLimit, modelMemoryLimit);
state.form.modelMemoryLimitValidationResult = mmlValidationResult;
state.isValid =
maxDistinctValuesError === undefined &&
!jobTypeEmpty &&
state.form.modelMemoryLimitUnitValid &&
!mmlValidationResult &&
!jobIdEmpty &&
jobIdValid &&
!jobIdExists &&
@ -262,7 +334,6 @@ const validateForm = (state: State): State => {
!destinationIndexNameEmpty &&
destinationIndexNameValid &&
!dependentVariableEmpty &&
!modelMemoryLimitEmpty &&
(!destinationIndexPatternTitleExists || !createIndexPattern);
return state;
@ -373,6 +444,12 @@ export function reducer(state: State, action: Action): State {
isAdvancedEditorEnabled: true,
jobConfig,
});
case ACTION.SET_ESTIMATED_MODEL_MEMORY_LIMIT:
return {
...state,
estimatedModelMemoryLimit: action.value,
};
}
return state;

View file

@ -67,6 +67,7 @@ export interface State {
maxDistinctValuesError: string | undefined;
modelMemoryLimit: string | undefined;
modelMemoryLimitUnitValid: boolean;
modelMemoryLimitValidationResult: any;
previousJobType: null | AnalyticsJobType;
previousSourceIndex: EsIndexName | undefined;
sourceIndex: EsIndexName;
@ -88,6 +89,7 @@ export interface State {
jobConfig: DeepPartial<DataFrameAnalyticsConfig>;
jobIds: DataFrameAnalyticsId[];
requestMessages: FormMessage[];
estimatedModelMemoryLimit: string;
}
export const getInitialState = (): State => ({
@ -118,6 +120,7 @@ export const getInitialState = (): State => ({
maxDistinctValuesError: undefined,
modelMemoryLimit: undefined,
modelMemoryLimitUnitValid: true,
modelMemoryLimitValidationResult: null,
previousJobType: null,
previousSourceIndex: undefined,
sourceIndex: '',
@ -142,6 +145,7 @@ export const getInitialState = (): State => ({
isValid: false,
jobIds: [],
requestMessages: [],
estimatedModelMemoryLimit: '',
});
export const getJobConfigFromFormState = (

View file

@ -297,6 +297,10 @@ export const useCreateAnalyticsForm = (): CreateAnalyticsFormProps => {
dispatch({ type: ACTION.SWITCH_TO_ADVANCED_EDITOR });
};
const setEstimatedModelMemoryLimit = (value: State['estimatedModelMemoryLimit']) => {
dispatch({ type: ACTION.SET_ESTIMATED_MODEL_MEMORY_LIMIT, value });
};
const actions: ActionDispatchers = {
closeModal,
createAnalyticsJob,
@ -308,6 +312,7 @@ export const useCreateAnalyticsForm = (): CreateAnalyticsFormProps => {
setJobConfig,
startAnalyticsJob,
switchToAdvancedEditor,
setEstimatedModelMemoryLimit,
};
return { state, actions };