[ML] Data Frame Analytics creation wizard: add validation step (Part 1) (#93478)

* wip: create validationStep component

* wip: trainingPercent check, analysisFields check. Step details

* move validation check to server

* handle no training percent in validation

* move callout component to shared dir

* use shared Callout component in AD val and update message headings

* update types

* adds functional tests for validation

* adds api integration test for validate endpoint

* consolidate messages for depvar and fields

* fix accessibility test

* update license

* update validation messages

* update types in validation model

* add jobValidationReturnType
This commit is contained in:
Melissa Alvarez 2021-03-05 21:48:39 -05:00 committed by GitHub
parent 020a8ee7dd
commit cac26b8cda
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
28 changed files with 947 additions and 101 deletions

View file

@ -5,6 +5,16 @@
* 2.0.
*/
export interface CalloutMessage {
id: string;
heading: string;
status: VALIDATION_STATUS;
text: string;
url?: string;
}
export type ValidateAnalyticsJobResponse = CalloutMessage[];
export enum VALIDATION_STATUS {
ERROR = 'error',
INFO = 'info',
@ -17,3 +27,10 @@ export const SKIP_BUCKET_SPAN_ESTIMATION = true;
export const ALLOWED_DATA_UNITS = ['B', 'KB', 'MB', 'GB', 'TB', 'PB'];
export const JOB_ID_MAX_LENGTH = 64;
// Data Frame Analytics
export const TRAINING_DOCS_UPPER = 200000;
export const TRAINING_DOCS_LOWER = 200;
export const INCLUDED_FIELDS_THRESHOLD = 100;
export const MINIMUM_NUM_FIELD_FOR_CHECK = 25;
export const FRACTION_EMPTY_LIMIT = 0.3;

View file

@ -28,7 +28,7 @@ export interface OutlierAnalysis {
interface Regression {
dependent_variable: string;
training_percent?: number;
training_percent: number;
num_top_feature_importance_values?: number;
prediction_field_name?: string;
}
@ -36,7 +36,7 @@ interface Regression {
interface Classification {
class_assignment_objective?: string;
dependent_variable: string;
training_percent?: number;
training_percent: number;
num_top_classes?: number;
num_top_feature_importance_values?: number;
prediction_field_name?: string;

View file

@ -0,0 +1,67 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import React, { FC } from 'react';
import { EuiCallOut, EuiLink, EuiSpacer } from '@elastic/eui';
import { FormattedMessage } from '@kbn/i18n/react';
import { CalloutMessage, VALIDATION_STATUS } from '../../../../common/constants/validation';
export const defaultIconType = 'questionInCircle';
const statusToEuiColor = (status: VALIDATION_STATUS) => {
switch (status) {
case VALIDATION_STATUS.INFO:
return 'primary';
case VALIDATION_STATUS.ERROR:
return 'danger';
default:
return status;
}
};
export const statusToEuiIconType = (status: VALIDATION_STATUS) => {
switch (status) {
case VALIDATION_STATUS.INFO:
return 'iInCircle';
case VALIDATION_STATUS.ERROR:
return 'cross';
case VALIDATION_STATUS.SUCCESS:
return 'check';
case VALIDATION_STATUS.WARNING:
return 'alert';
default:
return status;
}
};
const Link: FC<{ url: string }> = ({ url }) => (
<EuiLink href={url} target="_BLANK">
<FormattedMessage id="xpack.ml.validateJob.learnMoreLinkText" defaultMessage="Learn more" />
</EuiLink>
);
const Message: FC<Pick<CalloutMessage, 'text' | 'url'>> = ({ text, url }) => (
<>
{text} {url && <Link url={url} />}
</>
);
export const Callout: FC<CalloutMessage> = ({ heading, status, text, url }) => (
<>
<EuiCallOut
data-test-subj={'mlValidationCallout'}
// @ts-ignore
color={statusToEuiColor(status)}
size="s"
title={heading || <Message text={text} url={url} />}
iconType={status ? statusToEuiIconType(status) : defaultIconType}
>
{heading && <Message text={text} url={url} />}
</EuiCallOut>
<EuiSpacer size="m" />
</>
);

View file

@ -0,0 +1,8 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
export { Callout, statusToEuiIconType } from './callout';

View file

@ -12,14 +12,12 @@ import React, { Component, Fragment } from 'react';
import {
EuiButton,
EuiCallOut,
EuiLink,
EuiModal,
EuiModalBody,
EuiModalFooter,
EuiModalHeader,
EuiModalHeaderTitle,
EuiSpacer,
EuiText,
EuiFlexGroup,
EuiFlexItem,
@ -31,6 +29,7 @@ import { FormattedMessage } from '@kbn/i18n/react';
import { getDocLinks } from '../../util/dependency_cache';
import { VALIDATION_STATUS } from '../../../../common/constants/validation';
import { Callout, statusToEuiIconType } from '../callout';
import { getMostSevereMessageStatus } from '../../../../common/util/validation_utils';
import { toastNotificationServiceProvider } from '../../services/toast_notification_service';
import { withKibana } from '../../../../../../../src/plugins/kibana_react/public';
@ -49,77 +48,20 @@ const getDefaultState = () => ({
title: '',
});
const statusToEuiColor = (status) => {
switch (status) {
case VALIDATION_STATUS.INFO:
return 'primary';
break;
case VALIDATION_STATUS.ERROR:
return 'danger';
break;
default:
return status;
}
};
const statusToEuiIconType = (status) => {
switch (status) {
case VALIDATION_STATUS.INFO:
return 'iInCircle';
break;
case VALIDATION_STATUS.ERROR:
return 'cross';
break;
case VALIDATION_STATUS.SUCCESS:
return 'check';
break;
case VALIDATION_STATUS.WARNING:
return 'alert';
break;
default:
return status;
}
};
const Link = ({ url }) => (
<EuiLink href={url} target="_BLANK">
<FormattedMessage id="xpack.ml.validateJob.learnMoreLinkText" defaultMessage="Learn more" />
</EuiLink>
);
Link.propTypes = {
url: PropTypes.string.isRequired,
};
// Message is its own component so it can be passed
// as the "title" prop in the Callout component.
const Message = ({ message }) => (
<React.Fragment>
{message.text} {message.url && <Link url={message.url} />}
</React.Fragment>
);
Message.propTypes = {
message: PropTypes.shape({
text: PropTypes.string,
url: PropTypes.string,
}),
};
const MessageList = ({ messages, idFilterList }) => {
const callouts = messages
.filter((m) => idFilterList.includes(m.id) === false)
.map((m, i) => <Callout key={`${m.id}_${i}`} message={m} />);
.map((m, i) => <Callout key={`${m.id}_${i}`} {...m} />);
// there could be no error or success messages due to the
// idFilterList being applied. so rather than showing nothing,
// show a message saying all passed
const allPassedCallout = (
<Callout
message={{
text: i18n.translate('xpack.ml.validateJob.allPassed', {
defaultMessage: 'All validation checks passed successfully',
}),
status: VALIDATION_STATUS.SUCCESS,
}}
text={i18n.translate('xpack.ml.validateJob.allPassed', {
defaultMessage: 'All validation checks passed successfully',
})}
status={VALIDATION_STATUS.SUCCESS}
/>
);
@ -130,27 +72,6 @@ MessageList.propTypes = {
idFilterList: PropTypes.array,
};
const Callout = ({ message }) => (
<React.Fragment>
<EuiCallOut
color={statusToEuiColor(message.status)}
size="s"
title={message.heading || <Message message={message} />}
iconType={statusToEuiIconType(message.status)}
>
{message.heading && <Message message={message} />}
</EuiCallOut>
<EuiSpacer size="m" />
</React.Fragment>
);
Callout.propTypes = {
message: PropTypes.shape({
status: PropTypes.string,
text: PropTypes.string,
url: PropTypes.string,
}),
};
const LoadingSpinner = () => (
<EuiFlexGroup justifyContent="spaceAround" alignItems="center">
<EuiFlexItem grow={false}>

View file

@ -182,7 +182,8 @@ export const getTrainingPercent = (
analysis: AnalysisConfig
):
| RegressionAnalysis['regression']['training_percent']
| ClassificationAnalysis['classification']['training_percent'] => {
| ClassificationAnalysis['classification']['training_percent']
| undefined => {
let trainingPercent;
if (isRegressionAnalysis(analysis)) {

View file

@ -31,10 +31,10 @@ export interface ListItems {
description: string | JSX.Element;
}
export const AdvancedStepDetails: FC<{ setCurrentStep: any; state: State }> = ({
setCurrentStep,
state,
}) => {
export const AdvancedStepDetails: FC<{
setCurrentStep: React.Dispatch<React.SetStateAction<ANALYTICS_STEPS>>;
state: State;
}> = ({ setCurrentStep, state }) => {
const { form, isJobCreated } = state;
const {
computeFeatureInfluence,

View file

@ -25,7 +25,7 @@ import { ANALYTICS_STEPS } from '../../page';
const MAX_INCLUDES_LENGTH = 5;
interface Props {
setCurrentStep: React.Dispatch<React.SetStateAction<any>>;
setCurrentStep: React.Dispatch<React.SetStateAction<ANALYTICS_STEPS>>;
state: State;
}

View file

@ -22,10 +22,10 @@ export interface ListItems {
description: string | JSX.Element;
}
export const DetailsStepDetails: FC<{ setCurrentStep: any; state: State }> = ({
setCurrentStep,
state,
}) => {
export const DetailsStepDetails: FC<{
setCurrentStep: React.Dispatch<React.SetStateAction<ANALYTICS_STEPS>>;
state: State;
}> = ({ setCurrentStep, state }) => {
const { form, isJobCreated } = state;
const { description, jobId, destinationIndex, resultsField } = form;

View file

@ -382,7 +382,7 @@ export const DetailsStepForm: FC<CreateAnalyticsStepProps> = ({
<ContinueButton
isDisabled={isStepInvalid}
onClick={() => {
setCurrentStep(ANALYTICS_STEPS.CREATE);
setCurrentStep(ANALYTICS_STEPS.VALIDATION);
}}
/>
</Fragment>

View file

@ -9,3 +9,4 @@ export { ConfigurationStep } from './configuration_step/index';
export { AdvancedStep } from './advanced_step/index';
export { DetailsStep } from './details_step/index';
export { CreateStep } from './create_step/index';
export { ValidationStepWrapper } from './validation_step/index';

View file

@ -0,0 +1,8 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
export { ValidationStepWrapper } from './validation_step_wrapper';

View file

@ -0,0 +1,106 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import React, { FC, useEffect, useState } from 'react';
import { EuiLoadingSpinner, EuiSpacer } from '@elastic/eui';
import { i18n } from '@kbn/i18n';
import { useMlApiContext } from '../../../../../contexts/kibana';
import { extractErrorMessage } from '../../../../../../../common/util/errors';
import { CreateAnalyticsStepProps } from '../../../analytics_management/hooks/use_create_analytics_form';
import { getJobConfigFromFormState } from '../../../analytics_management/hooks/use_create_analytics_form/state';
import {
CalloutMessage,
ValidateAnalyticsJobResponse,
VALIDATION_STATUS,
} from '../../../../../../../common/constants/validation';
import { DataFrameAnalyticsConfig } from '../../../../../../../common/types/data_frame_analytics';
import { Callout } from '../../../../../components/callout';
import { ANALYTICS_STEPS } from '../../page';
import { ContinueButton } from '../continue_button';
import { ValidationSummary } from './validation_step_wrapper';
interface Props extends CreateAnalyticsStepProps {
setValidationSummary: React.Dispatch<React.SetStateAction<ValidationSummary>>;
}
export const ValidationStep: FC<Props> = ({ state, setCurrentStep, setValidationSummary }) => {
const [checksInProgress, setChecksInProgress] = useState<boolean>(false);
const [validationMessages, setValidationMessages] = useState<CalloutMessage[]>([]);
const [errorMessage, setErrorMessage] = useState<CalloutMessage | undefined>();
const { form, jobConfig, isAdvancedEditorEnabled } = state;
const {
dataFrameAnalytics: { validateDataFrameAnalytics },
} = useMlApiContext();
const runValidationChecks = async () => {
try {
const analyticsJobConfig = (isAdvancedEditorEnabled
? jobConfig
: getJobConfigFromFormState(form)) as DataFrameAnalyticsConfig;
const validationResults: ValidateAnalyticsJobResponse = await validateDataFrameAnalytics(
analyticsJobConfig
);
const validationSummary = { warning: 0, success: 0 };
validationResults.forEach((message) => {
if (message?.status === VALIDATION_STATUS.WARNING) {
validationSummary.warning++;
} else if (message?.status === VALIDATION_STATUS.SUCCESS) {
validationSummary.success++;
}
});
setValidationMessages(validationResults);
setValidationSummary(validationSummary);
setChecksInProgress(false);
} catch (err) {
setErrorMessage({
heading: i18n.translate(
'xpack.ml.dataframe.analytics.validation.validationFetchErrorMessage',
{
defaultMessage: 'Error validating job',
}
),
id: 'error',
status: VALIDATION_STATUS.ERROR,
text: extractErrorMessage(err),
});
setChecksInProgress(false);
}
};
useEffect(function beginValidationChecks() {
setChecksInProgress(true);
runValidationChecks();
}, []);
if (errorMessage !== undefined) {
validationMessages.push(errorMessage);
}
const callouts = validationMessages.map((m, i) => <Callout key={`${m.id}_${i}`} {...m} />);
return (
<>
{checksInProgress && <EuiLoadingSpinner size="xl" />}
{!checksInProgress && (
<>
{callouts}
<EuiSpacer />
<ContinueButton
isDisabled={false}
onClick={() => {
setCurrentStep(ANALYTICS_STEPS.CREATE);
}}
/>
</>
)}
</>
);
};

View file

@ -0,0 +1,99 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import React, { FC } from 'react';
import { i18n } from '@kbn/i18n';
import {
EuiButtonEmpty,
EuiDescriptionList,
EuiFlexGroup,
EuiFlexItem,
EuiIcon,
EuiSpacer,
EuiText,
} from '@elastic/eui';
import { State } from '../../../analytics_management/hooks/use_create_analytics_form/state';
import { ANALYTICS_STEPS } from '../../page';
import { ValidationSummary } from './validation_step_wrapper';
export const ValidationStepDetails: FC<{
setCurrentStep: React.Dispatch<React.SetStateAction<ANALYTICS_STEPS>>;
state: State;
validationSummary: ValidationSummary;
}> = ({ setCurrentStep, state, validationSummary }) => {
const { isJobCreated } = state;
const detailsFirstCol = [
{
title: i18n.translate(
'xpack.ml.dataframe.analytics.create.validatioinDetails.successfulChecks',
{
defaultMessage: 'Successful checks',
}
),
description: (
<>
<EuiFlexGroup gutterSize="xs" alignItems="center">
<EuiFlexItem grow={false}>
<EuiText size="s">{validationSummary.success}</EuiText>
</EuiFlexItem>
<EuiFlexItem grow={false}>
<EuiIcon type="check" />
</EuiFlexItem>
</EuiFlexGroup>
</>
),
},
];
const detailsSecondCol = [
{
title: i18n.translate('xpack.ml.dataframe.analytics.create.validatioinDetails.warnings', {
defaultMessage: 'Warnings',
}),
description: (
<>
<EuiFlexGroup gutterSize="xs">
<EuiFlexItem grow={false}>
<EuiText size="s">{validationSummary.warning}</EuiText>
</EuiFlexItem>
<EuiFlexItem grow={false}>
<EuiIcon type="alert" />
</EuiFlexItem>
</EuiFlexGroup>
</>
),
},
];
return (
<>
<EuiFlexGroup style={{ width: '70%' }}>
<EuiFlexItem grow={false}>
<EuiDescriptionList compressed listItems={detailsFirstCol} />
</EuiFlexItem>
<EuiFlexItem grow={false}>
<EuiDescriptionList
style={{ wordBreak: 'break-word' }}
compressed
listItems={detailsSecondCol}
/>
</EuiFlexItem>
</EuiFlexGroup>
<EuiSpacer />
{!isJobCreated && (
<EuiButtonEmpty
size="s"
onClick={() => {
setCurrentStep(ANALYTICS_STEPS.VALIDATION);
}}
>
{i18n.translate('xpack.ml.dataframe.analytics.create.validationDetails.viewButtonText', {
defaultMessage: 'View',
})}
</EuiButtonEmpty>
)}
</>
);
};

View file

@ -0,0 +1,59 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import React, { FC, useState } from 'react';
import { EuiForm } from '@elastic/eui';
import { CreateAnalyticsStepProps } from '../../../analytics_management/hooks/use_create_analytics_form';
import { ValidationStep } from './validation_step';
import { ValidationStepDetails } from './validation_step_details';
import { ANALYTICS_STEPS } from '../../page';
export interface ValidationSummary {
warning: number;
success: number;
}
export const ValidationStepWrapper: FC<CreateAnalyticsStepProps> = ({
actions,
state,
setCurrentStep,
step,
stepActivated,
}) => {
const [validationSummary, setValidationSummary] = useState<ValidationSummary>({
warning: 0,
success: 0,
});
const showValidationStep = step === ANALYTICS_STEPS.VALIDATION;
const showDetails = step !== ANALYTICS_STEPS.VALIDATION && stepActivated === true;
const dataTestSubj = `mlAnalyticsCreateJobWizardValidationStepWrapper${
showValidationStep ? ' active' : ''
}${showDetails ? ' summary' : ''}`;
return (
<EuiForm className="mlDataFrameAnalyticsCreateForm" data-test-subj={dataTestSubj}>
{showValidationStep && (
<ValidationStep
actions={actions}
state={state}
setCurrentStep={setCurrentStep}
setValidationSummary={setValidationSummary}
/>
)}
{showDetails && (
<ValidationStepDetails
setCurrentStep={setCurrentStep}
state={state}
validationSummary={validationSummary}
/>
)}
</EuiForm>
);
};

View file

@ -25,13 +25,20 @@ import { useMlContext } from '../../../contexts/ml';
import { ml } from '../../../services/ml_api_service';
import { useCreateAnalyticsForm } from '../analytics_management/hooks/use_create_analytics_form';
import { CreateAnalyticsAdvancedEditor } from './components/create_analytics_advanced_editor';
import { AdvancedStep, ConfigurationStep, CreateStep, DetailsStep } from './components';
import {
AdvancedStep,
ConfigurationStep,
CreateStep,
DetailsStep,
ValidationStepWrapper,
} from './components';
import { DataFrameAnalyticsId } from '../../../../../common/types/data_frame_analytics';
export enum ANALYTICS_STEPS {
CONFIGURATION,
ADVANCED,
DETAILS,
VALIDATION,
CREATE,
}
@ -41,7 +48,13 @@ interface Props {
export const Page: FC<Props> = ({ jobId }) => {
const [currentStep, setCurrentStep] = useState<ANALYTICS_STEPS>(ANALYTICS_STEPS.CONFIGURATION);
const [activatedSteps, setActivatedSteps] = useState<boolean[]>([true, false, false, false]);
const [activatedSteps, setActivatedSteps] = useState<boolean[]>([
true,
false,
false,
false,
false,
]);
const mlContext = useMlContext();
const { currentIndexPattern } = mlContext;
@ -127,6 +140,21 @@ export const Page: FC<Props> = ({ jobId }) => {
),
status: currentStep >= ANALYTICS_STEPS.DETAILS ? undefined : ('incomplete' as EuiStepStatus),
},
{
title: i18n.translate('xpack.ml.dataframe.analytics.creation.validationStepTitle', {
defaultMessage: 'Validation',
}),
children: (
<ValidationStepWrapper
{...createAnalyticsForm}
setCurrentStep={setCurrentStep}
step={currentStep}
stepActivated={activatedSteps[ANALYTICS_STEPS.VALIDATION]}
/>
),
status:
currentStep >= ANALYTICS_STEPS.VALIDATION ? undefined : ('incomplete' as EuiStepStatus),
},
{
title: i18n.translate('xpack.ml.dataframe.analytics.creation.createStepTitle', {
defaultMessage: 'Create',

View file

@ -42,7 +42,7 @@ export interface CreateAnalyticsFormProps {
}
export interface CreateAnalyticsStepProps extends CreateAnalyticsFormProps {
setCurrentStep: React.Dispatch<React.SetStateAction<any>>;
setCurrentStep: React.Dispatch<React.SetStateAction<ANALYTICS_STEPS>>;
step?: ANALYTICS_STEPS;
stepActivated?: boolean;
}

View file

@ -9,6 +9,7 @@ import { http } from '../http_service';
import { basePath } from './index';
import { DataFrameAnalyticsStats } from '../../data_frame_analytics/pages/analytics_management/components/analytics_list/common';
import { ValidateAnalyticsJobResponse } from '../../../../common/constants/validation';
import {
DataFrameAnalyticsConfig,
UpdateDataFrameAnalyticsConfig,
@ -166,4 +167,12 @@ export const dataFrameAnalytics = {
method: 'GET',
});
},
validateDataFrameAnalytics(analyticsConfig: DeepPartial<DataFrameAnalyticsConfig>) {
const body = JSON.stringify(analyticsConfig);
return http<ValidateAnalyticsJobResponse>({
path: `${basePath()}/data_frame/analytics/validate`,
method: 'POST',
body,
});
},
};

View file

@ -0,0 +1,295 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { i18n } from '@kbn/i18n';
import { IScopedClusterClient } from 'kibana/server';
import { getAnalysisType } from '../../../common/util/analytics_utils';
import {
INCLUDED_FIELDS_THRESHOLD,
MINIMUM_NUM_FIELD_FOR_CHECK,
FRACTION_EMPTY_LIMIT,
TRAINING_DOCS_LOWER,
TRAINING_DOCS_UPPER,
VALIDATION_STATUS,
} from '../../../common/constants/validation';
import { getDependentVar } from '../../../common/util/analytics_utils';
import { extractErrorMessage } from '../../../common/util/errors';
import { SearchResponse7 } from '../../../common';
import { DataFrameAnalyticsConfig } from '../../../common/types/data_frame_analytics';
interface MissingAgg {
[key: string]: {
doc_count: number;
};
}
interface CardinalityAgg {
[key: string]: {
value: number;
};
}
type ValidationSearchResult = Omit<SearchResponse7, 'aggregations'> & {
aggregations: MissingAgg | CardinalityAgg;
};
const defaultQuery = { match_all: {} };
const trainingPercentHeading = i18n.translate(
'xpack.ml.models.dfaValidation.messages.trainingPercentHeading',
{
defaultMessage: 'Training percent',
}
);
const analysisFieldsHeading = i18n.translate(
'xpack.ml.models.dfaValidation.messages.analysisFieldsHeading',
{
defaultMessage: 'Analysis fields',
}
);
const dependentVarHeading = i18n.translate(
'xpack.ml.models.dfaValidation.messages.dependentVarHeading',
{
defaultMessage: 'Dependent variable',
}
);
const dependentVarWarningMessage = {
id: 'dep_var_check',
text: '',
status: VALIDATION_STATUS.WARNING,
heading: dependentVarHeading,
};
const analysisFieldsWarningMessage = {
id: 'analysis_fields',
text: '',
status: VALIDATION_STATUS.WARNING,
heading: analysisFieldsHeading,
};
function getTrainingPercentMessage(trainingDocs: number) {
if (trainingDocs >= TRAINING_DOCS_UPPER) {
return {
id: 'training_percent_high',
text: i18n.translate('xpack.ml.models.dfaValidation.messages.highTrainingPercentWarning', {
defaultMessage:
'A high number of training docs may result in long running jobs. Try reducing the training percent.',
}),
status: VALIDATION_STATUS.WARNING,
heading: trainingPercentHeading,
};
} else if (trainingDocs <= TRAINING_DOCS_LOWER) {
return {
id: 'training_percent_low',
text: i18n.translate('xpack.ml.models.dfaValidation.messages.lowTrainingPercentWarning', {
defaultMessage:
'A low number of training docs may result in inaccurate models. Try increasing the training percent or using a larger data set.',
}),
status: VALIDATION_STATUS.WARNING,
heading: trainingPercentHeading,
};
} else {
return {
id: 'training_percent',
text: i18n.translate('xpack.ml.models.dfaValidation.messages.trainingPercentSuccess', {
defaultMessage: 'The training percent is high enough to model patterns in the data.',
}),
status: VALIDATION_STATUS.SUCCESS,
heading: trainingPercentHeading,
};
}
}
async function getValidationCheckMessages(
asCurrentUser: IScopedClusterClient['asCurrentUser'],
analyzedFields: string[],
index: string | string[],
query: any = defaultQuery,
depVar: string,
trainingPercent?: number
) {
const messages = [];
const emptyFields: string[] = [];
const percentEmptyLimit = FRACTION_EMPTY_LIMIT * 100;
let depVarValid = true;
let analysisFieldsNumHigh = false;
let analysisFieldsEmpty = false;
const fieldLimit =
analyzedFields.length <= MINIMUM_NUM_FIELD_FOR_CHECK
? analyzedFields.length
: MINIMUM_NUM_FIELD_FOR_CHECK;
let aggs = analyzedFields.slice(0, fieldLimit).reduce((acc, curr) => {
acc[curr] = { missing: { field: curr } };
return acc;
}, {} as any);
if (depVar !== '') {
const depVarAgg =
depVar !== ''
? {
[`${depVar}_const`]: {
cardinality: { field: depVar },
},
}
: {};
aggs = { ...aggs, ...depVarAgg };
}
try {
const { body } = await asCurrentUser.search<ValidationSearchResult>({
index,
size: 0,
track_total_hits: true,
body: {
query,
aggs,
},
});
const totalDocs = body.hits.total.value;
if (trainingPercent) {
const trainingDocs = totalDocs * (trainingPercent / 100);
const trainingPercentMessage = getTrainingPercentMessage(trainingDocs);
if (trainingPercentMessage) {
messages.push(trainingPercentMessage);
}
if (analyzedFields.length && analyzedFields.length > INCLUDED_FIELDS_THRESHOLD) {
analysisFieldsNumHigh = true;
}
}
if (body.aggregations) {
Object.entries(body.aggregations).forEach(([aggName, { doc_count: docCount, value }]) => {
const empty = docCount / totalDocs;
if (docCount > 0 && empty > FRACTION_EMPTY_LIMIT) {
emptyFields.push(aggName);
if (aggName === depVar) {
depVarValid = false;
dependentVarWarningMessage.text = i18n.translate(
'xpack.ml.models.dfaValidation.messages.depVarEmptyWarning',
{
defaultMessage:
'The dependent variable has at least {percentEmpty}% empty values. It may be unsuitable for analysis.',
values: { percentEmpty: percentEmptyLimit },
}
);
}
}
if (aggName === `${depVar}_const`) {
if (value === 1) {
depVarValid = false;
dependentVarWarningMessage.text = i18n.translate(
'xpack.ml.models.dfaValidation.messages.depVarContsWarning',
{
defaultMessage:
'The dependent variable is a constant value. It may be unsuitable for analysis.',
}
);
}
if (depVarValid === true) {
messages.push({
id: 'dep_var_check',
text: i18n.translate('xpack.ml.models.dfaValidation.messages.depVarSuccess', {
defaultMessage: 'The dependent variable field contains useful values for analysis.',
}),
status: VALIDATION_STATUS.SUCCESS,
heading: dependentVarHeading,
});
} else {
messages.push(dependentVarWarningMessage);
}
}
});
}
if (emptyFields.length) {
analysisFieldsEmpty = true;
}
if (analysisFieldsEmpty || analysisFieldsNumHigh) {
if (analysisFieldsEmpty && analysisFieldsNumHigh) {
analysisFieldsWarningMessage.text = i18n.translate(
'xpack.ml.models.dfaValidation.messages.analysisFieldsWarningText',
{
defaultMessage:
'Some fields included for analysis have at least {percentEmpty}% empty values. The number of selected fields is high and may result in increased resource usage and long-running jobs.',
values: { percentEmpty: percentEmptyLimit },
}
);
} else if (analysisFieldsEmpty && !analysisFieldsNumHigh) {
analysisFieldsWarningMessage.text = i18n.translate(
'xpack.ml.models.dfaValidation.messages.analysisFieldsEmptyWarningText',
{
defaultMessage:
'Some fields included for analysis have at least {percentEmpty}% empty values and may not be suitable for analysis.',
values: { percentEmpty: percentEmptyLimit },
}
);
} else {
analysisFieldsWarningMessage.text = i18n.translate(
'xpack.ml.models.dfaValidation.messages.analysisFieldsHighWarningText',
{
defaultMessage:
'The number of selected fields is high and may result in increased resource usage and long-running jobs.',
}
);
}
messages.push(analysisFieldsWarningMessage);
} else {
messages.push({
id: 'analysis_fields',
text: i18n.translate('xpack.ml.models.dfaValidation.messages.analysisFieldsSuccessText', {
defaultMessage:
'The selected analysis fields are sufficiently populated and contain useful data for analysis.',
}),
status: VALIDATION_STATUS.SUCCESS,
heading: analysisFieldsHeading,
});
}
} catch (e) {
const error = extractErrorMessage(e);
messages.push({
id: 'validation_error',
text: i18n.translate('xpack.ml.models.dfaValidation.messages.validationErrorText', {
defaultMessage: 'An error occurred attempting to validate job. {error}',
values: { error },
}),
status: VALIDATION_STATUS.ERROR,
heading: i18n.translate('xpack.ml.models.dfaValidation.messages.validationErrorHeading', {
defaultMessage: 'Unable to validate job.',
}),
});
}
return messages;
}
export async function validateAnalyticsJob(
client: IScopedClusterClient,
job: DataFrameAnalyticsConfig
) {
const analysisType = getAnalysisType(job.analysis);
const analysis = job.analysis[analysisType];
const depVar = getDependentVar(job.analysis);
const messages = await getValidationCheckMessages(
client.asCurrentUser,
job.analyzed_fields.includes,
job.source.index,
job.source.query,
depVar,
// @ts-ignore
analysis.training_percent
);
return messages;
}

View file

@ -18,6 +18,7 @@
"DeleteDataFrameAnalytics",
"JobsExist",
"GetDataFrameAnalyticsIdMap",
"ValidateDataFrameAnalytics",
"DataVisualizer",
"GetOverallStats",

View file

@ -25,6 +25,7 @@ import {
import { GetAnalyticsMapArgs, ExtendAnalyticsMapArgs } from '../models/data_frame_analytics/types';
import { IndexPatternHandler } from '../models/data_frame_analytics/index_patterns';
import { AnalyticsManager } from '../models/data_frame_analytics/analytics_manager';
import { validateAnalyticsJob } from '../models/data_frame_analytics/validation';
import { DeleteDataFrameAnalyticsWithIndexStatus } from '../../common/types/data_frame_analytics';
import { getAuthorizationHeader } from '../lib/request_authorization';
import { DataFrameAnalyticsConfig } from '../../common/types/data_frame_analytics';
@ -674,4 +675,36 @@ export function dataFrameAnalyticsRoutes({ router, mlLicense, routeGuard }: Rout
}
})
);
/**
* @apiGroup DataFrameAnalytics
*
* @api {post} /api/ml/data_frame/validate Validate the data frame analytics job config
* @apiName ValidateDataFrameAnalytics
* @apiDescription Validates the data frame analytics job config.
*
* @apiSchema (body) dataAnalyticsJobConfigSchema
*/
router.post(
{
path: '/api/ml/data_frame/analytics/validate',
validate: {
body: dataAnalyticsJobConfigSchema,
},
options: {
tags: ['access:ml:canCreateDataFrameAnalytics'],
},
},
routeGuard.fullLicenseAPIGuard(async ({ client, request, response }) => {
const jobConfig = request.body;
try {
const results = await validateAnalyticsJob(client, jobConfig);
return response.ok({
body: results,
});
} catch (e) {
return response.customError(wrapError(e));
}
})
);
}

View file

@ -276,6 +276,11 @@ export default function ({ getService }: FtrProviderContext) {
await a11y.testAppSnapshot();
});
it('data frame analytics create job validation step for outlier job', async () => {
await ml.dataFrameAnalyticsCreation.continueToValidationStep();
await a11y.testAppSnapshot();
});
it('data frame analytics create job create step for outlier job', async () => {
await ml.dataFrameAnalyticsCreation.continueToCreateStep();
await a11y.testAppSnapshot();

View file

@ -0,0 +1,135 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import expect from '@kbn/expect';
import { FtrProviderContext } from '../../../ftr_provider_context';
import { USER } from '../../../../functional/services/ml/security_common';
import { DataFrameAnalyticsConfig } from '../../../../../plugins/ml/public/application/data_frame_analytics/common';
import { DeepPartial } from '../../../../../plugins/ml/common/types/common';
import { COMMON_REQUEST_HEADERS } from '../../../../functional/services/ml/common_api';
export default ({ getService }: FtrProviderContext) => {
const esArchiver = getService('esArchiver');
const supertest = getService('supertestWithoutAuth');
const ml = getService('ml');
const jobId = `bm_${Date.now()}`;
const generateDestinationIndex = (analyticsId: string) => `user-${analyticsId}`;
const commonJobConfig = {
source: {
index: ['ft_bank_marketing'],
query: {
match_all: {},
},
},
analyzed_fields: {
includes: [],
excludes: [],
},
model_memory_limit: '60mb',
allow_lazy_start: false, // default value
max_num_threads: 1, // default value
};
const jobTypes = ['classification', 'regression', 'outlier_detection'];
const jobAnalyses: any = {
classification: {
dependent_variable: 'y',
training_percent: 20,
},
regression: {
dependent_variable: 'y',
training_percent: 20,
},
outlier_detection: {
compute_feature_influence: true,
standardization_enabled: true,
},
};
const testJobConfigs: Array<{
jobId: string;
jobType: string;
config: DeepPartial<DataFrameAnalyticsConfig>;
}> = ['Test classification job', 'Test regression job', 'Test outlier detection job'].map(
(description, idx) => {
const analyticsId = `${jobId}_${idx}`;
const jobType = jobTypes[idx];
return {
jobId: analyticsId,
jobType,
config: {
description,
dest: {
index: generateDestinationIndex(analyticsId),
results_field: 'ml',
},
analysis: { [jobType]: jobAnalyses[jobType] },
...commonJobConfig,
},
};
}
);
describe('POST data_frame/analytics/validate', () => {
before(async () => {
await esArchiver.loadIfNeeded('ml/bm_classification');
await ml.testResources.setKibanaTimeZoneToUTC();
});
after(async () => {
await ml.api.cleanMlIndices();
});
describe('ValidateDataFrameAnalytics', () => {
testJobConfigs.forEach((testConfig) => {
it(`should validate ${testConfig.jobType} job for given config`, async () => {
const requestBody = testConfig.config;
const { body } = await supertest
.post('/api/ml/data_frame/analytics/validate')
.auth(USER.ML_POWERUSER, ml.securityCommon.getPasswordForUser(USER.ML_POWERUSER))
.set(COMMON_REQUEST_HEADERS)
.send(requestBody)
.expect(200);
expect(body).not.to.be(undefined);
expect(body.length).to.eql(testConfig.jobType === 'outlier_detection' ? 1 : 3);
expect(Object.keys(body[0])).to.eql(['id', 'text', 'status', 'heading']);
});
});
it('should not allow analytics job validation for unauthorized user', async () => {
const requestBody = testJobConfigs[0].config;
const { body } = await supertest
.post('/api/ml/data_frame/analytics/validate')
.auth(USER.ML_UNAUTHORIZED, ml.securityCommon.getPasswordForUser(USER.ML_UNAUTHORIZED))
.set(COMMON_REQUEST_HEADERS)
.send(requestBody)
.expect(403);
expect(body.error).to.eql('Forbidden');
expect(body.message).to.eql('Forbidden');
});
it('should not allow analytics job validation for the user with only view permission', async () => {
const requestBody = testJobConfigs[0].config;
const { body } = await supertest
.post('/api/ml/data_frame/analytics/validate')
.auth(USER.ML_VIEWER, ml.securityCommon.getPasswordForUser(USER.ML_VIEWER))
.set(COMMON_REQUEST_HEADERS)
.send(requestBody)
.expect(403);
expect(body.error).to.eql('Forbidden');
expect(body.message).to.eql('Forbidden');
});
});
});
};

View file

@ -152,6 +152,13 @@ export default function ({ getService }: FtrProviderContext) {
testData.createIndexPattern
);
await ml.testExecution.logTestStep('continues to the validation step');
await ml.dataFrameAnalyticsCreation.continueToValidationStep();
await ml.testExecution.logTestStep('checks validation callouts exist');
await ml.dataFrameAnalyticsCreation.assertValidationCalloutsExists();
await ml.dataFrameAnalyticsCreation.assertAllValidationCalloutsPresent(3);
await ml.testExecution.logTestStep('continues to the create step');
await ml.dataFrameAnalyticsCreation.continueToCreateStep();
});

View file

@ -198,6 +198,16 @@ export default function ({ getService }: FtrProviderContext) {
await ml.dataFrameAnalyticsCreation.setJobId(cloneJobId);
await ml.dataFrameAnalyticsCreation.setDestIndex(cloneDestIndex);
await ml.testExecution.logTestStep('should continue to the validation step');
await ml.dataFrameAnalyticsCreation.continueToValidationStep();
await ml.testExecution.logTestStep('Should have validation callouts');
await ml.dataFrameAnalyticsCreation.assertValidationCalloutsExists();
await ml.dataFrameAnalyticsCreation.assertAllValidationCalloutsPresent(
testData?.job?.analysis?.outlier_detection !== undefined ? 1 : 3
);
await ml.testExecution.logTestStep('should continue to the create step');
await ml.dataFrameAnalyticsCreation.continueToCreateStep();
});

View file

@ -168,6 +168,13 @@ export default function ({ getService }: FtrProviderContext) {
testData.createIndexPattern
);
await ml.testExecution.logTestStep('continues to the validation step');
await ml.dataFrameAnalyticsCreation.continueToValidationStep();
await ml.testExecution.logTestStep('checks validation callouts exist');
await ml.dataFrameAnalyticsCreation.assertValidationCalloutsExists();
await ml.dataFrameAnalyticsCreation.assertAllValidationCalloutsPresent(1);
await ml.testExecution.logTestStep('continues to the create step');
await ml.dataFrameAnalyticsCreation.continueToCreateStep();
});

View file

@ -141,6 +141,13 @@ export default function ({ getService }: FtrProviderContext) {
testData.createIndexPattern
);
await ml.testExecution.logTestStep('continues to the validation step');
await ml.dataFrameAnalyticsCreation.continueToValidationStep();
await ml.testExecution.logTestStep('checks validation callouts exist');
await ml.dataFrameAnalyticsCreation.assertValidationCalloutsExists();
await ml.dataFrameAnalyticsCreation.assertAllValidationCalloutsPresent(3);
await ml.testExecution.logTestStep('continues to the create step');
await ml.dataFrameAnalyticsCreation.continueToCreateStep();
});

View file

@ -297,6 +297,10 @@ export function MachineLearningDataFrameAnalyticsCreationProvider(
await testSubjects.existOrFail('mlAnalyticsCreateJobWizardCreateStep active');
},
async assertValidationStepActive() {
await testSubjects.existOrFail('mlAnalyticsCreateJobWizardValidationStepWrapper active');
},
async continueToAdditionalOptionsStep() {
await retry.tryForTime(5000, async () => {
await testSubjects.clickWhenNotDisabled('mlAnalyticsCreateJobWizardContinueButton');
@ -311,6 +315,24 @@ export function MachineLearningDataFrameAnalyticsCreationProvider(
});
},
async continueToValidationStep() {
await retry.tryForTime(5000, async () => {
await testSubjects.clickWhenNotDisabled('mlAnalyticsCreateJobWizardContinueButton');
await this.assertValidationStepActive();
});
},
async assertValidationCalloutsExists() {
await retry.tryForTime(4000, async () => {
await testSubjects.existOrFail('mlValidationCallout');
});
},
async assertAllValidationCalloutsPresent(expectedNumCallouts: number) {
const validationCallouts = await testSubjects.findAll('mlValidationCallout');
expect(validationCallouts.length).to.eql(expectedNumCallouts);
},
async continueToCreateStep() {
await retry.tryForTime(5000, async () => {
await testSubjects.clickWhenNotDisabled('mlAnalyticsCreateJobWizardContinueButton');