mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 09:19:04 -04:00
[ML] Alerting rule for Anomaly Detection jobs monitoring (#106084)
* [ML] init job health alerting rule type * [ML] add health checks selection ui * [ML] define schema * [ML] support all jobs selection * [ML] jobs health service * [ML] add logger * [ML] add context message * [ML] fix default message for i18n * [ML] check response size * [ML] add exclude jobs control * [ML] getResultJobsHealthRuleConfig * [ML] change naming for shared services * [ML] fix excluded jobs filtering * [ML] check for execution results * [ML] update context fields * [ML] unit tests for getResultJobsHealthRuleConfig * [ML] refactor and job ids check * [ML] rename datafeed * [ML] fix translation messages * [ML] hide non-implemented tests * [ML] remove jod ids join from the getJobs call * [ML] add validation for the tests config * [ML] fix excluded jobs udpate * [ML] update jobIdsDescription message * [ML] allow selection all jobs only for include * [ML] better ux for excluded jobs setup * [ML] change rule type name * [ML] fix typo * [ML] change instances names * [ML] fix messages * [ML] hide error callout, show health checks error in EuiFormRow * [ML] add check for job state * [ML] add alertingRules key to the doc links * [ML] update types * [ML] remove redundant type * [ML] fix job and datafeed states check * [ML] fix job and datafeed states check, add comments * [ML] add unit tests
This commit is contained in:
parent
ba2915e54f
commit
10ef0e9e3e
20 changed files with 1135 additions and 86 deletions
|
@ -242,6 +242,7 @@ export class DocLinksService {
|
|||
anomalyDetectionJobResource: `${ELASTICSEARCH_DOCS}ml-put-job.html#ml-put-job-path-parms`,
|
||||
anomalyDetectionJobResourceAnalysisConfig: `${ELASTICSEARCH_DOCS}ml-put-job.html#put-analysisconfig`,
|
||||
anomalyDetectionJobTips: `${ELASTIC_WEBSITE_URL}guide/en/machine-learning/${DOC_LINK_VERSION}/ml-ad-finding-anomalies.html#ml-ad-job-tips`,
|
||||
alertingRules: `${ELASTIC_WEBSITE_URL}guide/en/machine-learning/${DOC_LINK_VERSION}/ml-configuring-alerts.html`,
|
||||
anomalyDetectionModelMemoryLimits: `${ELASTIC_WEBSITE_URL}guide/en/machine-learning/${DOC_LINK_VERSION}/ml-ad-finding-anomalies.html#ml-ad-model-memory-limits`,
|
||||
calendars: `${ELASTIC_WEBSITE_URL}guide/en/machine-learning/${DOC_LINK_VERSION}/ml-ad-finding-anomalies.html#ml-ad-calendars`,
|
||||
classificationEvaluation: `${ELASTIC_WEBSITE_URL}guide/en/machine-learning/${DOC_LINK_VERSION}/ml-dfa-classification.html#ml-dfanalytics-classification-evaluation`,
|
||||
|
|
|
@ -6,46 +6,22 @@
|
|||
*/
|
||||
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { ActionGroup } from '../../../alerting/common';
|
||||
import { MINIMUM_FULL_LICENSE } from '../license';
|
||||
import { PLUGIN_ID } from './app';
|
||||
|
||||
export const ML_ALERT_TYPES = {
|
||||
ANOMALY_DETECTION: 'xpack.ml.anomaly_detection_alert',
|
||||
AD_JOBS_HEALTH: 'xpack.ml.anomaly_detection_jobs_health',
|
||||
} as const;
|
||||
|
||||
export type MlAlertType = typeof ML_ALERT_TYPES[keyof typeof ML_ALERT_TYPES];
|
||||
|
||||
export const ANOMALY_SCORE_MATCH_GROUP_ID = 'anomaly_score_match';
|
||||
export type AnomalyScoreMatchGroupId = typeof ANOMALY_SCORE_MATCH_GROUP_ID;
|
||||
export const THRESHOLD_MET_GROUP: ActionGroup<AnomalyScoreMatchGroupId> = {
|
||||
id: ANOMALY_SCORE_MATCH_GROUP_ID,
|
||||
name: i18n.translate('xpack.ml.anomalyDetectionAlert.actionGroupName', {
|
||||
defaultMessage: 'Anomaly score matched the condition',
|
||||
}),
|
||||
};
|
||||
|
||||
export const ML_ALERT_TYPES_CONFIG: Record<
|
||||
MlAlertType,
|
||||
{
|
||||
name: string;
|
||||
actionGroups: Array<ActionGroup<AnomalyScoreMatchGroupId>>;
|
||||
defaultActionGroupId: AnomalyScoreMatchGroupId;
|
||||
minimumLicenseRequired: string;
|
||||
producer: string;
|
||||
}
|
||||
> = {
|
||||
[ML_ALERT_TYPES.ANOMALY_DETECTION]: {
|
||||
name: i18n.translate('xpack.ml.anomalyDetectionAlert.name', {
|
||||
defaultMessage: 'Anomaly detection alert',
|
||||
}),
|
||||
actionGroups: [THRESHOLD_MET_GROUP],
|
||||
defaultActionGroupId: ANOMALY_SCORE_MATCH_GROUP_ID,
|
||||
minimumLicenseRequired: MINIMUM_FULL_LICENSE,
|
||||
producer: PLUGIN_ID,
|
||||
},
|
||||
};
|
||||
|
||||
export const ALERT_PREVIEW_SAMPLE_SIZE = 5;
|
||||
|
||||
export const TOP_N_BUCKETS_COUNT = 1;
|
||||
|
||||
export const ALL_JOBS_SELECTION = '*';
|
||||
|
||||
export const HEALTH_CHECK_NAMES = {
|
||||
datafeed: i18n.translate('xpack.ml.alertTypes.jobsHealthAlertingRule.datafeedCheckName', {
|
||||
defaultMessage: 'Datafeed is not started',
|
||||
}),
|
||||
};
|
||||
|
|
|
@ -108,3 +108,38 @@ export type MlAnomalyDetectionAlertRule = Omit<Alert<MlAnomalyDetectionAlertPara
|
|||
export interface JobAlertingRuleStats {
|
||||
alerting_rules?: MlAnomalyDetectionAlertRule[];
|
||||
}
|
||||
|
||||
interface CommonHealthCheckConfig {
|
||||
enabled: boolean;
|
||||
}
|
||||
|
||||
export type MlAnomalyDetectionJobsHealthRuleParams = {
|
||||
includeJobs: {
|
||||
jobIds?: string[];
|
||||
groupIds?: string[];
|
||||
};
|
||||
excludeJobs?: {
|
||||
jobIds?: string[];
|
||||
groupIds?: string[];
|
||||
} | null;
|
||||
testsConfig?: {
|
||||
datafeed?: CommonHealthCheckConfig | null;
|
||||
mml?: CommonHealthCheckConfig | null;
|
||||
delayedData?:
|
||||
| (CommonHealthCheckConfig & {
|
||||
docsCount?: number | null;
|
||||
timeInterval?: string | null;
|
||||
})
|
||||
| null;
|
||||
behindRealtime?:
|
||||
| (CommonHealthCheckConfig & {
|
||||
timeInterval?: string | null;
|
||||
})
|
||||
| null;
|
||||
errorMessages?: CommonHealthCheckConfig | null;
|
||||
} | null;
|
||||
} & AlertTypeParams;
|
||||
|
||||
export type JobsHealthRuleTestsConfig = MlAnomalyDetectionJobsHealthRuleParams['testsConfig'];
|
||||
|
||||
export type JobsHealthTests = keyof Exclude<JobsHealthRuleTestsConfig, null | undefined>;
|
||||
|
|
|
@ -5,7 +5,11 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import { getLookbackInterval, resolveLookbackInterval } from './alerts';
|
||||
import {
|
||||
getLookbackInterval,
|
||||
getResultJobsHealthRuleConfig,
|
||||
resolveLookbackInterval,
|
||||
} from './alerts';
|
||||
import type { CombinedJobWithStats, Datafeed, Job } from '../types/anomaly_detection_jobs';
|
||||
|
||||
describe('resolveLookbackInterval', () => {
|
||||
|
@ -76,3 +80,49 @@ describe('getLookbackInterval', () => {
|
|||
expect(getLookbackInterval(testJobs)).toBe('32m');
|
||||
});
|
||||
});
|
||||
|
||||
describe('getResultJobsHealthRuleConfig', () => {
|
||||
test('returns default config for empty configuration', () => {
|
||||
expect(getResultJobsHealthRuleConfig(null)).toEqual({
|
||||
datafeed: {
|
||||
enabled: true,
|
||||
},
|
||||
mml: {
|
||||
enabled: true,
|
||||
},
|
||||
delayedData: {
|
||||
enabled: true,
|
||||
},
|
||||
behindRealtime: {
|
||||
enabled: true,
|
||||
},
|
||||
errorMessages: {
|
||||
enabled: true,
|
||||
},
|
||||
});
|
||||
});
|
||||
test('returns config with overridden values based on provided configuration', () => {
|
||||
expect(
|
||||
getResultJobsHealthRuleConfig({
|
||||
mml: { enabled: false },
|
||||
errorMessages: { enabled: true },
|
||||
})
|
||||
).toEqual({
|
||||
datafeed: {
|
||||
enabled: true,
|
||||
},
|
||||
mml: {
|
||||
enabled: false,
|
||||
},
|
||||
delayedData: {
|
||||
enabled: true,
|
||||
},
|
||||
behindRealtime: {
|
||||
enabled: true,
|
||||
},
|
||||
errorMessages: {
|
||||
enabled: true,
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -9,6 +9,7 @@ import { CombinedJobWithStats, Datafeed, Job } from '../types/anomaly_detection_
|
|||
import { resolveMaxTimeInterval } from './job_utils';
|
||||
import { isDefined } from '../types/guards';
|
||||
import { parseInterval } from './parse_interval';
|
||||
import { JobsHealthRuleTestsConfig } from '../types/alerts';
|
||||
|
||||
const narrowBucketLength = 60;
|
||||
|
||||
|
@ -51,3 +52,27 @@ export function getTopNBuckets(job: Job): number {
|
|||
|
||||
return Math.ceil(narrowBucketLength / bucketSpan.asSeconds());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns tests configuration combined with default values.
|
||||
* @param config
|
||||
*/
|
||||
export function getResultJobsHealthRuleConfig(config: JobsHealthRuleTestsConfig) {
|
||||
return {
|
||||
datafeed: {
|
||||
enabled: config?.datafeed?.enabled ?? true,
|
||||
},
|
||||
mml: {
|
||||
enabled: config?.mml?.enabled ?? true,
|
||||
},
|
||||
delayedData: {
|
||||
enabled: config?.delayedData?.enabled ?? true,
|
||||
},
|
||||
behindRealtime: {
|
||||
enabled: config?.behindRealtime?.enabled ?? true,
|
||||
},
|
||||
errorMessages: {
|
||||
enabled: config?.errorMessages?.enabled ?? true,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
|
@ -5,12 +5,13 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import React, { FC, useCallback, useEffect, useMemo, useState } from 'react';
|
||||
import React, { FC, ReactNode, useCallback, useEffect, useMemo, useState } from 'react';
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { FormattedMessage } from '@kbn/i18n/react';
|
||||
import { EuiComboBox, EuiComboBoxOptionOption, EuiComboBoxProps, EuiFormRow } from '@elastic/eui';
|
||||
import { JobId } from '../../common/types/anomaly_detection_jobs';
|
||||
import { MlApiServices } from '../application/services/ml_api_service';
|
||||
import { ALL_JOBS_SELECTION } from '../../common/constants/alerts';
|
||||
|
||||
interface JobSelection {
|
||||
jobIds?: JobId[];
|
||||
|
@ -25,6 +26,17 @@ export interface JobSelectorControlProps {
|
|||
* Validation is handled by alerting framework
|
||||
*/
|
||||
errors: string[];
|
||||
/** Enables multiple selection of jobs and groups */
|
||||
multiSelect?: boolean;
|
||||
label?: ReactNode;
|
||||
/**
|
||||
* Allows selecting all jobs, even those created afterward.
|
||||
*/
|
||||
allowSelectAll?: boolean;
|
||||
/**
|
||||
* Available options to select. By default suggest all existing jobs.
|
||||
*/
|
||||
options?: Array<EuiComboBoxOptionOption<string>>;
|
||||
}
|
||||
|
||||
export const JobSelectorControl: FC<JobSelectorControlProps> = ({
|
||||
|
@ -32,6 +44,10 @@ export const JobSelectorControl: FC<JobSelectorControlProps> = ({
|
|||
onChange,
|
||||
adJobsApiService,
|
||||
errors,
|
||||
multiSelect = false,
|
||||
label,
|
||||
allowSelectAll = false,
|
||||
options: defaultOptions,
|
||||
}) => {
|
||||
const [options, setOptions] = useState<Array<EuiComboBoxOptionOption<string>>>([]);
|
||||
const jobIds = useMemo(() => new Set(), []);
|
||||
|
@ -60,12 +76,39 @@ export const JobSelectorControl: FC<JobSelectorControlProps> = ({
|
|||
});
|
||||
|
||||
setOptions([
|
||||
...(allowSelectAll
|
||||
? [
|
||||
{
|
||||
label: i18n.translate('xpack.ml.jobSelector.selectAllGroupLabel', {
|
||||
defaultMessage: 'Select all',
|
||||
}),
|
||||
options: [
|
||||
{
|
||||
label: i18n.translate('xpack.ml.jobSelector.selectAllOptionLabel', {
|
||||
defaultMessage: '*',
|
||||
}),
|
||||
value: ALL_JOBS_SELECTION,
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
: []),
|
||||
{
|
||||
label: i18n.translate('xpack.ml.jobSelector.jobOptionsLabel', {
|
||||
defaultMessage: 'Jobs',
|
||||
}),
|
||||
options: jobIdOptions.map((v) => ({ label: v })),
|
||||
},
|
||||
...(multiSelect
|
||||
? [
|
||||
{
|
||||
label: i18n.translate('xpack.ml.jobSelector.groupOptionsLabel', {
|
||||
defaultMessage: 'Groups',
|
||||
}),
|
||||
options: groupIdOptions.map((v) => ({ label: v })),
|
||||
},
|
||||
]
|
||||
: []),
|
||||
]);
|
||||
} catch (e) {
|
||||
// TODO add error handling
|
||||
|
@ -73,25 +116,33 @@ export const JobSelectorControl: FC<JobSelectorControlProps> = ({
|
|||
}, [adJobsApiService]);
|
||||
|
||||
const onSelectionChange: EuiComboBoxProps<string>['onChange'] = useCallback(
|
||||
(selectionUpdate) => {
|
||||
((selectionUpdate) => {
|
||||
if (selectionUpdate.some((selectedOption) => selectedOption.value === ALL_JOBS_SELECTION)) {
|
||||
onChange({ jobIds: [ALL_JOBS_SELECTION] });
|
||||
return;
|
||||
}
|
||||
|
||||
const selectedJobIds: JobId[] = [];
|
||||
const selectedGroupIds: string[] = [];
|
||||
selectionUpdate.forEach(({ label }: { label: string }) => {
|
||||
if (jobIds.has(label)) {
|
||||
selectedJobIds.push(label);
|
||||
} else if (groupIds.has(label)) {
|
||||
selectedGroupIds.push(label);
|
||||
selectionUpdate.forEach(({ label: selectedLabel }: { label: string }) => {
|
||||
if (jobIds.has(selectedLabel)) {
|
||||
selectedJobIds.push(selectedLabel);
|
||||
} else if (groupIds.has(selectedLabel)) {
|
||||
selectedGroupIds.push(selectedLabel);
|
||||
} else if (defaultOptions?.some((v) => v.options?.some((o) => o.label === selectedLabel))) {
|
||||
selectedJobIds.push(selectedLabel);
|
||||
}
|
||||
});
|
||||
onChange({
|
||||
...(selectedJobIds.length > 0 ? { jobIds: selectedJobIds } : {}),
|
||||
...(selectedGroupIds.length > 0 ? { groupIds: selectedGroupIds } : {}),
|
||||
});
|
||||
},
|
||||
[jobIds, groupIds]
|
||||
}) as Exclude<EuiComboBoxProps<string>['onChange'], undefined>,
|
||||
[jobIds, groupIds, defaultOptions]
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
if (defaultOptions) return;
|
||||
fetchOptions();
|
||||
}, []);
|
||||
|
||||
|
@ -99,15 +150,20 @@ export const JobSelectorControl: FC<JobSelectorControlProps> = ({
|
|||
<EuiFormRow
|
||||
fullWidth
|
||||
label={
|
||||
<FormattedMessage id="xpack.ml.jobSelector.formControlLabel" defaultMessage="Select job" />
|
||||
label ?? (
|
||||
<FormattedMessage
|
||||
id="xpack.ml.jobSelector.formControlLabel"
|
||||
defaultMessage="Select job"
|
||||
/>
|
||||
)
|
||||
}
|
||||
isInvalid={!!errors?.length}
|
||||
error={errors}
|
||||
>
|
||||
<EuiComboBox<string>
|
||||
singleSelection
|
||||
singleSelection={!multiSelect}
|
||||
selectedOptions={selectedOptions}
|
||||
options={options}
|
||||
options={defaultOptions ?? options}
|
||||
onChange={onSelectionChange}
|
||||
fullWidth
|
||||
data-test-subj={'mlAnomalyAlertJobSelection'}
|
||||
|
|
|
@ -0,0 +1,148 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import React, { FC, useCallback, useMemo, useState } from 'react';
|
||||
import { EuiComboBoxOptionOption, EuiForm, EuiSpacer } from '@elastic/eui';
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { FormattedMessage } from '@kbn/i18n/react';
|
||||
import useDebounce from 'react-use/lib/useDebounce';
|
||||
import { AlertTypeParamsExpressionProps } from '../../../../triggers_actions_ui/public';
|
||||
import { MlAnomalyDetectionJobsHealthRuleParams } from '../../../common/types/alerts';
|
||||
import { JobSelectorControl } from '../job_selector';
|
||||
import { jobsApiProvider } from '../../application/services/ml_api_service/jobs';
|
||||
import { HttpService } from '../../application/services/http_service';
|
||||
import { useMlKibana } from '../../application/contexts/kibana';
|
||||
import { TestsSelectionControl } from './tests_selection_control';
|
||||
import { isPopulatedObject } from '../../../common';
|
||||
import { ALL_JOBS_SELECTION } from '../../../common/constants/alerts';
|
||||
|
||||
export type MlAnomalyAlertTriggerProps = AlertTypeParamsExpressionProps<MlAnomalyDetectionJobsHealthRuleParams>;
|
||||
|
||||
const AnomalyDetectionJobsHealthRuleTrigger: FC<MlAnomalyAlertTriggerProps> = ({
|
||||
alertParams,
|
||||
setAlertParams,
|
||||
errors,
|
||||
}) => {
|
||||
const {
|
||||
services: { http },
|
||||
} = useMlKibana();
|
||||
const mlHttpService = useMemo(() => new HttpService(http), [http]);
|
||||
const adJobsApiService = useMemo(() => jobsApiProvider(mlHttpService), [mlHttpService]);
|
||||
const [excludeJobsOptions, setExcludeJobsOptions] = useState<
|
||||
Array<EuiComboBoxOptionOption<string>>
|
||||
>([]);
|
||||
|
||||
const includeJobsAndGroupIds: string[] = useMemo(
|
||||
() => (Object.values(alertParams.includeJobs ?? {}) as string[][]).flat(),
|
||||
[alertParams.includeJobs]
|
||||
);
|
||||
|
||||
const excludeJobsAndGroupIds: string[] = useMemo(
|
||||
() => (Object.values(alertParams.excludeJobs ?? {}) as string[][]).flat(),
|
||||
[alertParams.excludeJobs]
|
||||
);
|
||||
|
||||
const onAlertParamChange = useCallback(
|
||||
<T extends keyof MlAnomalyDetectionJobsHealthRuleParams>(param: T) => (
|
||||
update: MlAnomalyDetectionJobsHealthRuleParams[T]
|
||||
) => {
|
||||
setAlertParams(param, update);
|
||||
},
|
||||
[]
|
||||
);
|
||||
|
||||
const formErrors = Object.values(errors).flat();
|
||||
const isFormInvalid = formErrors.length > 0;
|
||||
|
||||
useDebounce(
|
||||
function updateExcludeJobsOptions() {
|
||||
const areAllJobsSelected = alertParams.includeJobs?.jobIds?.[0] === ALL_JOBS_SELECTION;
|
||||
|
||||
if (!areAllJobsSelected && !alertParams.includeJobs?.groupIds?.length) {
|
||||
// It only makes sense to suggest excluded jobs options when at least one group or all jobs are selected
|
||||
setExcludeJobsOptions([]);
|
||||
return;
|
||||
}
|
||||
|
||||
adJobsApiService
|
||||
.jobs(areAllJobsSelected ? [] : (alertParams.includeJobs.groupIds as string[]))
|
||||
.then((jobs) => {
|
||||
setExcludeJobsOptions([
|
||||
{
|
||||
label: i18n.translate('xpack.ml.jobSelector.jobOptionsLabel', {
|
||||
defaultMessage: 'Jobs',
|
||||
}),
|
||||
options: jobs.map((v) => ({ label: v.job_id })),
|
||||
},
|
||||
]);
|
||||
});
|
||||
},
|
||||
500,
|
||||
[alertParams.includeJobs]
|
||||
);
|
||||
|
||||
return (
|
||||
<EuiForm
|
||||
data-test-subj={'mlJobsHealthAlertingRuleForm'}
|
||||
invalidCallout={'none'}
|
||||
error={formErrors}
|
||||
isInvalid={isFormInvalid}
|
||||
>
|
||||
<JobSelectorControl
|
||||
jobsAndGroupIds={includeJobsAndGroupIds}
|
||||
adJobsApiService={adJobsApiService}
|
||||
onChange={useCallback(onAlertParamChange('includeJobs'), [])}
|
||||
errors={Array.isArray(errors.includeJobs) ? errors.includeJobs : []}
|
||||
multiSelect
|
||||
allowSelectAll
|
||||
label={
|
||||
<FormattedMessage
|
||||
id="xpack.ml.alertTypes.jobsHealthAlertingRule.includeJobs.label"
|
||||
defaultMessage="Include jobs or groups"
|
||||
/>
|
||||
}
|
||||
/>
|
||||
|
||||
<EuiSpacer size="m" />
|
||||
|
||||
<JobSelectorControl
|
||||
jobsAndGroupIds={excludeJobsAndGroupIds}
|
||||
adJobsApiService={adJobsApiService}
|
||||
onChange={useCallback((update) => {
|
||||
const callback = onAlertParamChange('excludeJobs');
|
||||
if (isPopulatedObject(update)) {
|
||||
callback(update);
|
||||
} else {
|
||||
callback(null);
|
||||
}
|
||||
}, [])}
|
||||
errors={Array.isArray(errors.excludeJobs) ? errors.excludeJobs : []}
|
||||
multiSelect
|
||||
label={
|
||||
<FormattedMessage
|
||||
id="xpack.ml.alertTypes.jobsHealthAlertingRule.excludeJobs.label"
|
||||
defaultMessage="Exclude jobs or groups"
|
||||
/>
|
||||
}
|
||||
options={excludeJobsOptions}
|
||||
/>
|
||||
|
||||
<EuiSpacer size="m" />
|
||||
|
||||
<TestsSelectionControl
|
||||
config={alertParams.testsConfig}
|
||||
onChange={useCallback(onAlertParamChange('testsConfig'), [])}
|
||||
errors={Array.isArray(errors.testsConfig) ? errors.testsConfig : []}
|
||||
/>
|
||||
</EuiForm>
|
||||
);
|
||||
};
|
||||
|
||||
// Default export is required for React.lazy loading
|
||||
|
||||
// eslint-disable-next-line import/no-default-export
|
||||
export default AnomalyDetectionJobsHealthRuleTrigger;
|
|
@ -0,0 +1,8 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
export { registerJobsHealthAlertingRule } from './register_jobs_health_alerting_rule';
|
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { lazy } from 'react';
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { TriggersAndActionsUIPublicPluginSetup } from '../../../../triggers_actions_ui/public';
|
||||
import { PluginSetupContract as AlertingSetup } from '../../../../alerting/public';
|
||||
import { ML_ALERT_TYPES } from '../../../common/constants/alerts';
|
||||
import { MlAnomalyDetectionJobsHealthRuleParams } from '../../../common/types/alerts';
|
||||
|
||||
export function registerJobsHealthAlertingRule(
|
||||
triggersActionsUi: TriggersAndActionsUIPublicPluginSetup,
|
||||
alerting?: AlertingSetup
|
||||
) {
|
||||
triggersActionsUi.alertTypeRegistry.register({
|
||||
id: ML_ALERT_TYPES.AD_JOBS_HEALTH,
|
||||
description: i18n.translate('xpack.ml.alertTypes.jobsHealthAlertingRule.description', {
|
||||
defaultMessage: 'Alert when anomaly detection jobs experience operational issues.',
|
||||
}),
|
||||
iconClass: 'bell',
|
||||
documentationUrl(docLinks) {
|
||||
return docLinks.links.ml.alertingRules;
|
||||
},
|
||||
alertParamsExpression: lazy(() => import('./anomaly_detection_jobs_health_rule_trigger')),
|
||||
validate: (alertParams: MlAnomalyDetectionJobsHealthRuleParams) => {
|
||||
const validationResult = {
|
||||
errors: {
|
||||
includeJobs: new Array<string>(),
|
||||
testsConfig: new Array<string>(),
|
||||
} as Record<keyof MlAnomalyDetectionJobsHealthRuleParams, string[]>,
|
||||
};
|
||||
|
||||
if (!alertParams.includeJobs?.jobIds?.length && !alertParams.includeJobs?.groupIds?.length) {
|
||||
validationResult.errors.includeJobs.push(
|
||||
i18n.translate('xpack.ml.alertTypes.jobsHealthAlertingRule.includeJobs.errorMessage', {
|
||||
defaultMessage: 'Job selection is required',
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
if (
|
||||
alertParams.testsConfig &&
|
||||
Object.values(alertParams.testsConfig).every((v) => v?.enabled === false)
|
||||
) {
|
||||
validationResult.errors.testsConfig.push(
|
||||
i18n.translate('xpack.ml.alertTypes.jobsHealthAlertingRule.testsConfig.errorMessage', {
|
||||
defaultMessage: 'At least one health check must be enabled.',
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
return validationResult;
|
||||
},
|
||||
requiresAppContext: false,
|
||||
defaultActionMessage: i18n.translate(
|
||||
'xpack.ml.alertTypes.jobsHealthAlertingRule.defaultActionMessage',
|
||||
{
|
||||
defaultMessage: `Anomaly detection jobs health check result:
|
||||
\\{\\{context.message\\}\\}
|
||||
- Job IDs: \\{\\{context.jobIds\\}\\}
|
||||
`,
|
||||
}
|
||||
),
|
||||
});
|
||||
}
|
|
@ -0,0 +1,125 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import React, { FC, useCallback } from 'react';
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { FormattedMessage } from '@kbn/i18n/react';
|
||||
import { EuiFormFieldset, EuiFormRow, EuiSpacer, EuiSwitch } from '@elastic/eui';
|
||||
import { JobsHealthRuleTestsConfig } from '../../../common/types/alerts';
|
||||
import { getResultJobsHealthRuleConfig } from '../../../common/util/alerts';
|
||||
import { HEALTH_CHECK_NAMES } from '../../../common/constants/alerts';
|
||||
|
||||
interface TestsSelectionControlProps {
|
||||
config: JobsHealthRuleTestsConfig;
|
||||
onChange: (update: JobsHealthRuleTestsConfig) => void;
|
||||
errors?: string[];
|
||||
}
|
||||
|
||||
export const TestsSelectionControl: FC<TestsSelectionControlProps> = ({
|
||||
config,
|
||||
onChange,
|
||||
errors,
|
||||
}) => {
|
||||
const uiConfig = getResultJobsHealthRuleConfig(config);
|
||||
|
||||
const updateCallback = useCallback(
|
||||
(update: Partial<Exclude<JobsHealthRuleTestsConfig, undefined>>) => {
|
||||
onChange({
|
||||
...(config ?? {}),
|
||||
...update,
|
||||
});
|
||||
},
|
||||
[onChange, config]
|
||||
);
|
||||
|
||||
return (
|
||||
<EuiFormFieldset
|
||||
legend={{
|
||||
children: i18n.translate(
|
||||
'xpack.ml.alertTypes.jobsHealthAlertingRule.testsSelection.legend',
|
||||
{
|
||||
defaultMessage: 'Select health checks to perform',
|
||||
}
|
||||
),
|
||||
}}
|
||||
>
|
||||
<EuiFormRow isInvalid={!!errors?.length} error={errors}>
|
||||
<EuiSwitch
|
||||
label={HEALTH_CHECK_NAMES.datafeed}
|
||||
onChange={updateCallback.bind(null, {
|
||||
datafeed: { enabled: !uiConfig.datafeed.enabled },
|
||||
})}
|
||||
checked={uiConfig.datafeed.enabled}
|
||||
/>
|
||||
</EuiFormRow>
|
||||
|
||||
<EuiSpacer size="s" />
|
||||
|
||||
{false && (
|
||||
<>
|
||||
<EuiSwitch
|
||||
label={
|
||||
<FormattedMessage
|
||||
id="xpack.ml.alertTypes.jobsHealthAlertingRule.testsSelection.mmlCheck.label"
|
||||
defaultMessage="Model memory limit monitoring"
|
||||
/>
|
||||
}
|
||||
onChange={updateCallback.bind(null, { mml: { enabled: !uiConfig.mml.enabled } })}
|
||||
checked={uiConfig.mml.enabled}
|
||||
/>
|
||||
|
||||
<EuiSpacer size="s" />
|
||||
|
||||
<EuiSwitch
|
||||
label={
|
||||
<FormattedMessage
|
||||
id="xpack.ml.alertTypes.jobsHealthAlertingRule.testsSelection.delayedDataCheck.label"
|
||||
defaultMessage="Delayed data"
|
||||
/>
|
||||
}
|
||||
onChange={updateCallback.bind(null, {
|
||||
delayedData: { enabled: !uiConfig.delayedData.enabled },
|
||||
})}
|
||||
checked={uiConfig.delayedData.enabled}
|
||||
/>
|
||||
|
||||
<EuiSpacer size="s" />
|
||||
|
||||
<EuiSwitch
|
||||
label={
|
||||
<FormattedMessage
|
||||
id="xpack.ml.alertTypes.jobsHealthAlertingRule.testsSelection.jobBehindRealtimeCheck.label"
|
||||
defaultMessage="Job is running behind real-time"
|
||||
/>
|
||||
}
|
||||
onChange={updateCallback.bind(null, {
|
||||
behindRealtime: { enabled: !uiConfig.behindRealtime.enabled },
|
||||
})}
|
||||
checked={uiConfig.behindRealtime.enabled}
|
||||
/>
|
||||
|
||||
<EuiSpacer size="s" />
|
||||
|
||||
<EuiSwitch
|
||||
label={
|
||||
<FormattedMessage
|
||||
id="xpack.ml.alertTypes.jobsHealthAlertingRule.testsSelection.errorMessagesCheck.label"
|
||||
defaultMessage="There are errors in the job messages."
|
||||
/>
|
||||
}
|
||||
onChange={updateCallback.bind(null, {
|
||||
errorMessages: { enabled: !uiConfig.errorMessages.enabled },
|
||||
})}
|
||||
checked={uiConfig.errorMessages.enabled}
|
||||
/>
|
||||
|
||||
<EuiSpacer size="s" />
|
||||
</>
|
||||
)}
|
||||
</EuiFormFieldset>
|
||||
);
|
||||
};
|
|
@ -14,6 +14,7 @@ import type { PluginSetupContract as AlertingSetup } from '../../../alerting/pub
|
|||
import { PLUGIN_ID } from '../../common/constants/app';
|
||||
import { formatExplorerUrl } from '../locator/formatters/anomaly_detection';
|
||||
import { validateLookbackInterval, validateTopNBucket } from './validators';
|
||||
import { registerJobsHealthAlertingRule } from './jobs_health_rule';
|
||||
|
||||
export function registerMlAlerts(
|
||||
triggersActionsUi: TriggersAndActionsUIPublicPluginSetup,
|
||||
|
@ -26,7 +27,7 @@ export function registerMlAlerts(
|
|||
}),
|
||||
iconClass: 'bell',
|
||||
documentationUrl(docLinks) {
|
||||
return `${docLinks.ELASTIC_WEBSITE_URL}guide/en/machine-learning/${docLinks.DOC_LINK_VERSION}/ml-configuring-alerts.html`;
|
||||
return docLinks.links.ml.alertingRules;
|
||||
},
|
||||
alertParamsExpression: lazy(() => import('./ml_anomaly_alert_trigger')),
|
||||
validate: (alertParams: MlAnomalyDetectionAlertParams) => {
|
||||
|
@ -137,6 +138,8 @@ export function registerMlAlerts(
|
|||
),
|
||||
});
|
||||
|
||||
registerJobsHealthAlertingRule(triggersActionsUi, alerting);
|
||||
|
||||
if (alerting) {
|
||||
registerNavigation(alerting);
|
||||
}
|
||||
|
|
|
@ -436,7 +436,7 @@ export function alertingServiceProvider(mlClient: MlClient, datafeedsService: Da
|
|||
|
||||
const jobIds = jobsResponse.map((v) => v.job_id);
|
||||
|
||||
const dataFeeds = await datafeedsService.getDatafeedByJobId(jobIds);
|
||||
const datafeeds = await datafeedsService.getDatafeedByJobId(jobIds);
|
||||
|
||||
const maxBucketInSeconds = resolveMaxTimeInterval(
|
||||
jobsResponse.map((v) => v.analysis_config.bucket_span)
|
||||
|
@ -448,7 +448,7 @@ export function alertingServiceProvider(mlClient: MlClient, datafeedsService: Da
|
|||
}
|
||||
|
||||
const lookBackTimeInterval: string =
|
||||
params.lookbackInterval ?? resolveLookbackInterval(jobsResponse, dataFeeds ?? []);
|
||||
params.lookbackInterval ?? resolveLookbackInterval(jobsResponse, datafeeds ?? []);
|
||||
|
||||
const topNBuckets: number = params.topNBuckets ?? getTopNBuckets(jobsResponse[0]);
|
||||
|
||||
|
|
180
x-pack/plugins/ml/server/lib/alerts/jobs_health_service.test.ts
Normal file
180
x-pack/plugins/ml/server/lib/alerts/jobs_health_service.test.ts
Normal file
|
@ -0,0 +1,180 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { JobsHealthService, jobsHealthServiceProvider } from './jobs_health_service';
|
||||
import type { DatafeedsService } from '../../models/job_service/datafeeds';
|
||||
import type { Logger } from 'kibana/server';
|
||||
import { MlClient } from '../ml_client';
|
||||
import { MlJob, MlJobStats } from '@elastic/elasticsearch/api/types';
|
||||
|
||||
describe('JobsHealthService', () => {
|
||||
const mlClient = ({
|
||||
getJobs: jest.fn().mockImplementation(({ job_id: jobIds = [] }) => {
|
||||
let jobs: MlJob[] = [];
|
||||
|
||||
if (jobIds.some((v: string) => v === 'test_group')) {
|
||||
jobs = [
|
||||
({
|
||||
job_id: 'test_job_01',
|
||||
} as unknown) as MlJob,
|
||||
({
|
||||
job_id: 'test_job_02',
|
||||
} as unknown) as MlJob,
|
||||
({
|
||||
job_id: 'test_job_03',
|
||||
} as unknown) as MlJob,
|
||||
];
|
||||
}
|
||||
|
||||
if (jobIds[0]?.startsWith('test_job_')) {
|
||||
jobs = [
|
||||
({
|
||||
job_id: jobIds[0],
|
||||
} as unknown) as MlJob,
|
||||
];
|
||||
}
|
||||
|
||||
return Promise.resolve({
|
||||
body: {
|
||||
jobs,
|
||||
},
|
||||
});
|
||||
}),
|
||||
getJobStats: jest.fn().mockImplementation(({ job_id: jobIdsStr }) => {
|
||||
const jobsIds = jobIdsStr.split(',');
|
||||
return Promise.resolve({
|
||||
body: {
|
||||
jobs: jobsIds.map((j: string) => {
|
||||
return {
|
||||
job_id: j,
|
||||
state: j === 'test_job_02' ? 'opened' : 'closed',
|
||||
};
|
||||
}) as MlJobStats,
|
||||
},
|
||||
});
|
||||
}),
|
||||
getDatafeedStats: jest.fn().mockImplementation(({ datafeed_id: datafeedIdsStr }) => {
|
||||
const datafeedIds = datafeedIdsStr.split(',');
|
||||
return Promise.resolve({
|
||||
body: {
|
||||
datafeeds: datafeedIds.map((d: string) => {
|
||||
return {
|
||||
datafeed_id: d,
|
||||
state: d === 'test_datafeed_02' ? 'stopped' : 'started',
|
||||
timing_stats: {
|
||||
job_id: d.replace('datafeed', 'job'),
|
||||
},
|
||||
};
|
||||
}) as MlJobStats,
|
||||
},
|
||||
});
|
||||
}),
|
||||
} as unknown) as jest.Mocked<MlClient>;
|
||||
|
||||
const datafeedsService = ({
|
||||
getDatafeedByJobId: jest.fn().mockImplementation((jobIds: string[]) => {
|
||||
return Promise.resolve(
|
||||
jobIds.map((j) => {
|
||||
return {
|
||||
datafeed_id: j.replace('job', 'datafeed'),
|
||||
};
|
||||
})
|
||||
);
|
||||
}),
|
||||
} as unknown) as jest.Mocked<DatafeedsService>;
|
||||
|
||||
const logger = ({
|
||||
warn: jest.fn(),
|
||||
info: jest.fn(),
|
||||
debug: jest.fn(),
|
||||
} as unknown) as jest.Mocked<Logger>;
|
||||
|
||||
const jobHealthService: JobsHealthService = jobsHealthServiceProvider(
|
||||
mlClient,
|
||||
datafeedsService,
|
||||
logger
|
||||
);
|
||||
|
||||
beforeEach(() => {});
|
||||
|
||||
afterEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
test('returns empty results when no jobs provided', async () => {
|
||||
// act
|
||||
const executionResult = await jobHealthService.getTestsResults('testRule', {
|
||||
testsConfig: null,
|
||||
includeJobs: {
|
||||
jobIds: ['*'],
|
||||
groupIds: [],
|
||||
},
|
||||
excludeJobs: null,
|
||||
});
|
||||
expect(logger.warn).toHaveBeenCalledWith('Rule "testRule" does not have associated jobs.');
|
||||
expect(datafeedsService.getDatafeedByJobId).not.toHaveBeenCalled();
|
||||
expect(executionResult).toEqual([]);
|
||||
});
|
||||
|
||||
test('returns empty results and does not perform datafeed check when test is disabled', async () => {
|
||||
const executionResult = await jobHealthService.getTestsResults('testRule', {
|
||||
testsConfig: {
|
||||
datafeed: {
|
||||
enabled: false,
|
||||
},
|
||||
behindRealtime: null,
|
||||
delayedData: null,
|
||||
errorMessages: null,
|
||||
mml: null,
|
||||
},
|
||||
includeJobs: {
|
||||
jobIds: ['test_job_01'],
|
||||
groupIds: [],
|
||||
},
|
||||
excludeJobs: null,
|
||||
});
|
||||
expect(logger.warn).not.toHaveBeenCalled();
|
||||
expect(logger.debug).toHaveBeenCalledWith(`Performing health checks for job IDs: test_job_01`);
|
||||
expect(datafeedsService.getDatafeedByJobId).not.toHaveBeenCalled();
|
||||
expect(executionResult).toEqual([]);
|
||||
});
|
||||
|
||||
test('returns results based on provided selection', async () => {
|
||||
const executionResult = await jobHealthService.getTestsResults('testRule_03', {
|
||||
testsConfig: null,
|
||||
includeJobs: {
|
||||
jobIds: [],
|
||||
groupIds: ['test_group'],
|
||||
},
|
||||
excludeJobs: {
|
||||
jobIds: ['test_job_03'],
|
||||
groupIds: [],
|
||||
},
|
||||
});
|
||||
expect(logger.warn).not.toHaveBeenCalled();
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
`Performing health checks for job IDs: test_job_01, test_job_02`
|
||||
);
|
||||
expect(datafeedsService.getDatafeedByJobId).toHaveBeenCalledWith([
|
||||
'test_job_01',
|
||||
'test_job_02',
|
||||
]);
|
||||
expect(mlClient.getJobStats).toHaveBeenCalledWith({ job_id: 'test_job_01,test_job_02' });
|
||||
expect(mlClient.getDatafeedStats).toHaveBeenCalledWith({
|
||||
datafeed_id: 'test_datafeed_01,test_datafeed_02',
|
||||
});
|
||||
expect(executionResult).toEqual([
|
||||
{
|
||||
name: 'Datafeed is not started',
|
||||
context: {
|
||||
jobIds: ['test_job_02'],
|
||||
message: 'Datafeed is not started for the following jobs:',
|
||||
},
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
185
x-pack/plugins/ml/server/lib/alerts/jobs_health_service.ts
Normal file
185
x-pack/plugins/ml/server/lib/alerts/jobs_health_service.ts
Normal file
|
@ -0,0 +1,185 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { KibanaRequest, SavedObjectsClientContract } from 'kibana/server';
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { Logger } from 'kibana/server';
|
||||
import { MlJobState } from '@elastic/elasticsearch/api/types';
|
||||
import { MlClient } from '../ml_client';
|
||||
import {
|
||||
AnomalyDetectionJobsHealthRuleParams,
|
||||
JobSelection,
|
||||
} from '../../routes/schemas/alerting_schema';
|
||||
import { datafeedsProvider, DatafeedsService } from '../../models/job_service/datafeeds';
|
||||
import { ALL_JOBS_SELECTION, HEALTH_CHECK_NAMES } from '../../../common/constants/alerts';
|
||||
import { DatafeedStats } from '../../../common/types/anomaly_detection_jobs';
|
||||
import { GetGuards } from '../../shared_services/shared_services';
|
||||
import { AnomalyDetectionJobsHealthAlertContext } from './register_jobs_monitoring_rule_type';
|
||||
import { getResultJobsHealthRuleConfig } from '../../../common/util/alerts';
|
||||
|
||||
interface TestResult {
|
||||
name: string;
|
||||
context: AnomalyDetectionJobsHealthAlertContext;
|
||||
}
|
||||
|
||||
type TestsResults = TestResult[];
|
||||
|
||||
type NotStartedDatafeedResponse = Array<DatafeedStats & { job_id: string; job_state: MlJobState }>;
|
||||
|
||||
export function jobsHealthServiceProvider(
|
||||
mlClient: MlClient,
|
||||
datafeedsService: DatafeedsService,
|
||||
logger: Logger
|
||||
) {
|
||||
/**
|
||||
* Extracts result list of job ids based on included and excluded selection of jobs and groups.
|
||||
* @param includeJobs
|
||||
* @param excludeJobs
|
||||
*/
|
||||
const getResultJobIds = async (includeJobs: JobSelection, excludeJobs?: JobSelection | null) => {
|
||||
const jobAndGroupIds = [...(includeJobs.jobIds ?? []), ...(includeJobs.groupIds ?? [])];
|
||||
|
||||
const includeAllJobs = jobAndGroupIds.some((id) => id === ALL_JOBS_SELECTION);
|
||||
|
||||
// Extract jobs from group ids and make sure provided jobs assigned to a current space
|
||||
const jobsResponse = (
|
||||
await mlClient.getJobs({
|
||||
...(includeAllJobs ? {} : { job_id: jobAndGroupIds }),
|
||||
})
|
||||
).body.jobs;
|
||||
|
||||
let resultJobIds = jobsResponse.map((v) => v.job_id);
|
||||
|
||||
if (excludeJobs && (!!excludeJobs.jobIds.length || !!excludeJobs?.groupIds.length)) {
|
||||
const excludedJobAndGroupIds = [
|
||||
...(excludeJobs?.jobIds ?? []),
|
||||
...(excludeJobs?.groupIds ?? []),
|
||||
];
|
||||
const excludedJobsResponse = (
|
||||
await mlClient.getJobs({
|
||||
job_id: excludedJobAndGroupIds,
|
||||
})
|
||||
).body.jobs;
|
||||
|
||||
const excludedJobsIds: Set<string> = new Set(excludedJobsResponse.map((v) => v.job_id));
|
||||
|
||||
resultJobIds = resultJobIds.filter((v) => !excludedJobsIds.has(v));
|
||||
}
|
||||
|
||||
return resultJobIds;
|
||||
};
|
||||
|
||||
return {
|
||||
/**
|
||||
* Gets not started datafeeds for opened jobs.
|
||||
* @param jobIds
|
||||
*/
|
||||
async getNotStartedDatafeeds(jobIds: string[]): Promise<NotStartedDatafeedResponse | void> {
|
||||
const datafeeds = await datafeedsService.getDatafeedByJobId(jobIds);
|
||||
|
||||
if (datafeeds) {
|
||||
const {
|
||||
body: { jobs: jobsStats },
|
||||
} = await mlClient.getJobStats({ job_id: jobIds.join(',') });
|
||||
|
||||
const {
|
||||
body: { datafeeds: datafeedsStats },
|
||||
} = await mlClient.getDatafeedStats({
|
||||
datafeed_id: datafeeds.map((d) => d.datafeed_id).join(','),
|
||||
});
|
||||
|
||||
// match datafeed stats with the job ids
|
||||
return (datafeedsStats as DatafeedStats[])
|
||||
.map((datafeedStats) => {
|
||||
const jobId = datafeedStats.timing_stats.job_id;
|
||||
const jobState =
|
||||
jobsStats.find((jobStats) => jobStats.job_id === jobId)?.state ?? 'failed';
|
||||
return {
|
||||
...datafeedStats,
|
||||
job_id: jobId,
|
||||
job_state: jobState,
|
||||
};
|
||||
})
|
||||
.filter((datafeedStat) => {
|
||||
// Find opened jobs with not started datafeeds
|
||||
return datafeedStat.job_state === 'opened' && datafeedStat.state !== 'started';
|
||||
});
|
||||
}
|
||||
},
|
||||
/**
|
||||
* Retrieves report grouped by test.
|
||||
*/
|
||||
async getTestsResults(
|
||||
ruleInstanceName: string,
|
||||
{ testsConfig, includeJobs, excludeJobs }: AnomalyDetectionJobsHealthRuleParams
|
||||
): Promise<TestsResults> {
|
||||
const config = getResultJobsHealthRuleConfig(testsConfig);
|
||||
|
||||
const results: TestsResults = [];
|
||||
|
||||
const jobIds = await getResultJobIds(includeJobs, excludeJobs);
|
||||
|
||||
if (jobIds.length === 0) {
|
||||
logger.warn(`Rule "${ruleInstanceName}" does not have associated jobs.`);
|
||||
return results;
|
||||
}
|
||||
|
||||
logger.debug(`Performing health checks for job IDs: ${jobIds.join(', ')}`);
|
||||
|
||||
if (config.datafeed.enabled) {
|
||||
const response = await this.getNotStartedDatafeeds(jobIds);
|
||||
if (response && response.length > 0) {
|
||||
results.push({
|
||||
name: HEALTH_CHECK_NAMES.datafeed,
|
||||
context: {
|
||||
jobIds: [...new Set(response.map((v) => v.job_id))],
|
||||
message: i18n.translate(
|
||||
'xpack.ml.alertTypes.jobsHealthAlertingRule.datafeedStateMessage',
|
||||
{
|
||||
defaultMessage: 'Datafeed is not started for the following jobs:',
|
||||
}
|
||||
),
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export type JobsHealthService = ReturnType<typeof jobsHealthServiceProvider>;
|
||||
|
||||
export function getJobsHealthServiceProvider(getGuards: GetGuards) {
|
||||
return {
|
||||
jobsHealthServiceProvider(
|
||||
savedObjectsClient: SavedObjectsClientContract,
|
||||
request: KibanaRequest,
|
||||
logger: Logger
|
||||
) {
|
||||
return {
|
||||
getTestsResults: async (
|
||||
...args: Parameters<JobsHealthService['getTestsResults']>
|
||||
): ReturnType<JobsHealthService['getTestsResults']> => {
|
||||
return await getGuards(request, savedObjectsClient)
|
||||
.isFullLicense()
|
||||
.hasMlCapabilities(['canGetJobs'])
|
||||
.ok(({ mlClient, scopedClient }) =>
|
||||
jobsHealthServiceProvider(
|
||||
mlClient,
|
||||
datafeedsProvider(scopedClient, mlClient),
|
||||
logger
|
||||
).getTestsResults(...args)
|
||||
);
|
||||
},
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export type JobsHealthServiceProvider = ReturnType<typeof getJobsHealthServiceProvider>;
|
|
@ -7,11 +7,7 @@
|
|||
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { KibanaRequest } from 'kibana/server';
|
||||
import {
|
||||
ML_ALERT_TYPES,
|
||||
ML_ALERT_TYPES_CONFIG,
|
||||
AnomalyScoreMatchGroupId,
|
||||
} from '../../../common/constants/alerts';
|
||||
import { ML_ALERT_TYPES } from '../../../common/constants/alerts';
|
||||
import { PLUGIN_ID } from '../../../common/constants/app';
|
||||
import { MINIMUM_FULL_LICENSE } from '../../../common/license';
|
||||
import {
|
||||
|
@ -21,13 +17,12 @@ import {
|
|||
import { RegisterAlertParams } from './register_ml_alerts';
|
||||
import { InfluencerAnomalyAlertDoc, RecordAnomalyAlertDoc } from '../../../common/types/alerts';
|
||||
import {
|
||||
ActionGroup,
|
||||
AlertInstanceContext,
|
||||
AlertInstanceState,
|
||||
AlertTypeState,
|
||||
} from '../../../../alerting/common';
|
||||
|
||||
const alertTypeConfig = ML_ALERT_TYPES_CONFIG[ML_ALERT_TYPES.ANOMALY_DETECTION];
|
||||
|
||||
export type AnomalyDetectionAlertContext = {
|
||||
name: string;
|
||||
jobIds: string[];
|
||||
|
@ -40,6 +35,17 @@ export type AnomalyDetectionAlertContext = {
|
|||
anomalyExplorerUrl: string;
|
||||
} & AlertInstanceContext;
|
||||
|
||||
export const ANOMALY_SCORE_MATCH_GROUP_ID = 'anomaly_score_match';
|
||||
|
||||
export type AnomalyScoreMatchGroupId = typeof ANOMALY_SCORE_MATCH_GROUP_ID;
|
||||
|
||||
export const THRESHOLD_MET_GROUP: ActionGroup<AnomalyScoreMatchGroupId> = {
|
||||
id: ANOMALY_SCORE_MATCH_GROUP_ID,
|
||||
name: i18n.translate('xpack.ml.anomalyDetectionAlert.actionGroupName', {
|
||||
defaultMessage: 'Anomaly score matched the condition',
|
||||
}),
|
||||
};
|
||||
|
||||
export function registerAnomalyDetectionAlertType({
|
||||
alerting,
|
||||
mlSharedServices,
|
||||
|
@ -53,9 +59,11 @@ export function registerAnomalyDetectionAlertType({
|
|||
AnomalyScoreMatchGroupId
|
||||
>({
|
||||
id: ML_ALERT_TYPES.ANOMALY_DETECTION,
|
||||
name: alertTypeConfig.name,
|
||||
actionGroups: alertTypeConfig.actionGroups,
|
||||
defaultActionGroupId: alertTypeConfig.defaultActionGroupId,
|
||||
name: i18n.translate('xpack.ml.anomalyDetectionAlert.name', {
|
||||
defaultMessage: 'Anomaly detection alert',
|
||||
}),
|
||||
actionGroups: [THRESHOLD_MET_GROUP],
|
||||
defaultActionGroupId: ANOMALY_SCORE_MATCH_GROUP_ID,
|
||||
validate: {
|
||||
params: mlAnomalyDetectionAlertParams,
|
||||
},
|
||||
|
@ -76,7 +84,7 @@ export function registerAnomalyDetectionAlertType({
|
|||
{
|
||||
name: 'jobIds',
|
||||
description: i18n.translate('xpack.ml.alertContext.jobIdsDescription', {
|
||||
defaultMessage: 'List of job IDs that triggered the alert instance',
|
||||
defaultMessage: 'List of job IDs that triggered the alert',
|
||||
}),
|
||||
},
|
||||
{
|
||||
|
@ -132,7 +140,7 @@ export function registerAnomalyDetectionAlertType({
|
|||
if (executionResult) {
|
||||
const alertInstanceName = executionResult.name;
|
||||
const alertInstance = services.alertInstanceFactory(alertInstanceName);
|
||||
alertInstance.scheduleActions(alertTypeConfig.defaultActionGroupId, executionResult);
|
||||
alertInstance.scheduleActions(ANOMALY_SCORE_MATCH_GROUP_ID, executionResult);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
|
|
@ -0,0 +1,109 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { KibanaRequest } from 'kibana/server';
|
||||
import { ML_ALERT_TYPES } from '../../../common/constants/alerts';
|
||||
import { PLUGIN_ID } from '../../../common/constants/app';
|
||||
import { MINIMUM_FULL_LICENSE } from '../../../common/license';
|
||||
import {
|
||||
anomalyDetectionJobsHealthRuleParams,
|
||||
AnomalyDetectionJobsHealthRuleParams,
|
||||
} from '../../routes/schemas/alerting_schema';
|
||||
import { RegisterAlertParams } from './register_ml_alerts';
|
||||
import {
|
||||
ActionGroup,
|
||||
AlertInstanceContext,
|
||||
AlertInstanceState,
|
||||
AlertTypeState,
|
||||
} from '../../../../alerting/common';
|
||||
|
||||
export type AnomalyDetectionJobsHealthAlertContext = {
|
||||
jobIds: string[];
|
||||
message: string;
|
||||
} & AlertInstanceContext;
|
||||
|
||||
export const ANOMALY_DETECTION_JOB_REALTIME_ISSUE = 'anomaly_detection_realtime_issue';
|
||||
|
||||
export type AnomalyDetectionJobRealtimeIssue = typeof ANOMALY_DETECTION_JOB_REALTIME_ISSUE;
|
||||
|
||||
export const REALTIME_ISSUE_DETECTED: ActionGroup<AnomalyDetectionJobRealtimeIssue> = {
|
||||
id: ANOMALY_DETECTION_JOB_REALTIME_ISSUE,
|
||||
name: i18n.translate('xpack.ml.jobsHealthAlertingRule.actionGroupName', {
|
||||
defaultMessage: 'Real-time issue detected',
|
||||
}),
|
||||
};
|
||||
|
||||
export function registerJobsMonitoringRuleType({
|
||||
alerting,
|
||||
mlServicesProviders,
|
||||
logger,
|
||||
}: RegisterAlertParams) {
|
||||
alerting.registerType<
|
||||
AnomalyDetectionJobsHealthRuleParams,
|
||||
never, // Only use if defining useSavedObjectReferences hook
|
||||
AlertTypeState,
|
||||
AlertInstanceState,
|
||||
AnomalyDetectionJobsHealthAlertContext,
|
||||
AnomalyDetectionJobRealtimeIssue
|
||||
>({
|
||||
id: ML_ALERT_TYPES.AD_JOBS_HEALTH,
|
||||
name: i18n.translate('xpack.ml.jobsHealthAlertingRule.name', {
|
||||
defaultMessage: 'Anomaly detection jobs health',
|
||||
}),
|
||||
actionGroups: [REALTIME_ISSUE_DETECTED],
|
||||
defaultActionGroupId: ANOMALY_DETECTION_JOB_REALTIME_ISSUE,
|
||||
validate: {
|
||||
params: anomalyDetectionJobsHealthRuleParams,
|
||||
},
|
||||
actionVariables: {
|
||||
context: [
|
||||
{
|
||||
name: 'jobIds',
|
||||
description: i18n.translate(
|
||||
'xpack.ml.alertTypes.jobsHealthAlertingRule.alertContext.jobIdsDescription',
|
||||
{
|
||||
defaultMessage: 'List of job IDs that triggered the alert',
|
||||
}
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'message',
|
||||
description: i18n.translate(
|
||||
'xpack.ml.alertTypes.jobsHealthAlertingRule.alertContext.messageDescription',
|
||||
{
|
||||
defaultMessage: 'Alert info message',
|
||||
}
|
||||
),
|
||||
},
|
||||
],
|
||||
},
|
||||
producer: PLUGIN_ID,
|
||||
minimumLicenseRequired: MINIMUM_FULL_LICENSE,
|
||||
isExportable: true,
|
||||
async executor({ services, params, alertId, state, previousStartedAt, startedAt, name }) {
|
||||
const fakeRequest = {} as KibanaRequest;
|
||||
const { getTestsResults } = mlServicesProviders.jobsHealthServiceProvider(
|
||||
services.savedObjectsClient,
|
||||
fakeRequest,
|
||||
logger
|
||||
);
|
||||
const executionResult = await getTestsResults(name, params);
|
||||
|
||||
if (executionResult.length > 0) {
|
||||
logger.info(
|
||||
`Scheduling actions for tests: ${executionResult.map((v) => v.name).join(', ')}`
|
||||
);
|
||||
|
||||
executionResult.forEach(({ name: alertInstanceName, context }) => {
|
||||
const alertInstance = services.alertInstanceFactory(alertInstanceName);
|
||||
alertInstance.scheduleActions(ANOMALY_DETECTION_JOB_REALTIME_ISSUE, context);
|
||||
});
|
||||
}
|
||||
},
|
||||
});
|
||||
}
|
|
@ -9,13 +9,17 @@ import { Logger } from 'kibana/server';
|
|||
import { AlertingPlugin } from '../../../../alerting/server';
|
||||
import { registerAnomalyDetectionAlertType } from './register_anomaly_detection_alert_type';
|
||||
import { SharedServices } from '../../shared_services';
|
||||
import { registerJobsMonitoringRuleType } from './register_jobs_monitoring_rule_type';
|
||||
import { MlServicesProviders } from '../../shared_services/shared_services';
|
||||
|
||||
export interface RegisterAlertParams {
|
||||
alerting: AlertingPlugin['setup'];
|
||||
logger: Logger;
|
||||
mlSharedServices: SharedServices;
|
||||
mlServicesProviders: MlServicesProviders;
|
||||
}
|
||||
|
||||
export function registerMlAlerts(params: RegisterAlertParams) {
|
||||
registerAnomalyDetectionAlertType(params);
|
||||
registerJobsMonitoringRuleType(params);
|
||||
}
|
||||
|
|
|
@ -196,7 +196,7 @@ export class MlServerPlugin
|
|||
|
||||
initMlServerLog({ log: this.log });
|
||||
|
||||
const sharedServices = createSharedServices(
|
||||
const { internalServicesProviders, sharedServicesProviders } = createSharedServices(
|
||||
this.mlLicense,
|
||||
getSpaces,
|
||||
plugins.cloud,
|
||||
|
@ -211,7 +211,8 @@ export class MlServerPlugin
|
|||
registerMlAlerts({
|
||||
alerting: plugins.alerting,
|
||||
logger: this.log,
|
||||
mlSharedServices: sharedServices,
|
||||
mlSharedServices: sharedServicesProviders,
|
||||
mlServicesProviders: internalServicesProviders,
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -219,7 +220,7 @@ export class MlServerPlugin
|
|||
registerCollector(plugins.usageCollection, this.kibanaIndexConfig.kibana.index);
|
||||
}
|
||||
|
||||
return { ...sharedServices };
|
||||
return sharedServicesProviders;
|
||||
}
|
||||
|
||||
public start(coreStart: CoreStart): MlPluginStart {
|
||||
|
|
|
@ -10,22 +10,24 @@ import { i18n } from '@kbn/i18n';
|
|||
import { ALERT_PREVIEW_SAMPLE_SIZE } from '../../../common/constants/alerts';
|
||||
import { ANOMALY_RESULT_TYPE } from '../../../common/constants/anomalies';
|
||||
|
||||
export const mlAnomalyDetectionAlertParams = schema.object({
|
||||
jobSelection: schema.object(
|
||||
{
|
||||
jobIds: schema.arrayOf(schema.string(), { defaultValue: [] }),
|
||||
groupIds: schema.arrayOf(schema.string(), { defaultValue: [] }),
|
||||
const jobsSelectionSchema = schema.object(
|
||||
{
|
||||
jobIds: schema.arrayOf(schema.string(), { defaultValue: [] }),
|
||||
groupIds: schema.arrayOf(schema.string(), { defaultValue: [] }),
|
||||
},
|
||||
{
|
||||
validate: (v) => {
|
||||
if (!v.jobIds?.length && !v.groupIds?.length) {
|
||||
return i18n.translate('xpack.ml.alertTypes.anomalyDetection.jobSelection.errorMessage', {
|
||||
defaultMessage: 'Job selection is required',
|
||||
});
|
||||
}
|
||||
},
|
||||
{
|
||||
validate: (v) => {
|
||||
if (!v.jobIds?.length && !v.groupIds?.length) {
|
||||
return i18n.translate('xpack.ml.alertTypes.anomalyDetection.jobSelection.errorMessage', {
|
||||
defaultMessage: 'Job selection is required',
|
||||
});
|
||||
}
|
||||
},
|
||||
}
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
export const mlAnomalyDetectionAlertParams = schema.object({
|
||||
jobSelection: jobsSelectionSchema,
|
||||
/** Anomaly score threshold */
|
||||
severity: schema.number({ min: 0, max: 100 }),
|
||||
/** Result type to alert upon */
|
||||
|
@ -58,3 +60,47 @@ export type MlAnomalyDetectionAlertParams = TypeOf<typeof mlAnomalyDetectionAler
|
|||
export type MlAnomalyDetectionAlertPreviewRequest = TypeOf<
|
||||
typeof mlAnomalyDetectionAlertPreviewRequest
|
||||
>;
|
||||
|
||||
export const anomalyDetectionJobsHealthRuleParams = schema.object({
|
||||
includeJobs: jobsSelectionSchema,
|
||||
excludeJobs: schema.nullable(jobsSelectionSchema),
|
||||
testsConfig: schema.nullable(
|
||||
schema.object({
|
||||
datafeed: schema.nullable(
|
||||
schema.object({
|
||||
enabled: schema.boolean({ defaultValue: true }),
|
||||
})
|
||||
),
|
||||
mml: schema.nullable(
|
||||
schema.object({
|
||||
enabled: schema.boolean({ defaultValue: true }),
|
||||
})
|
||||
),
|
||||
delayedData: schema.nullable(
|
||||
schema.object({
|
||||
enabled: schema.boolean({ defaultValue: true }),
|
||||
docsCount: schema.nullable(schema.number()),
|
||||
timeInterval: schema.nullable(schema.string()),
|
||||
})
|
||||
),
|
||||
behindRealtime: schema.nullable(
|
||||
schema.object({
|
||||
enabled: schema.boolean({ defaultValue: true }),
|
||||
timeInterval: schema.nullable(schema.string()),
|
||||
})
|
||||
),
|
||||
errorMessages: schema.nullable(
|
||||
schema.object({
|
||||
enabled: schema.boolean({ defaultValue: true }),
|
||||
})
|
||||
),
|
||||
})
|
||||
),
|
||||
});
|
||||
|
||||
export type AnomalyDetectionJobsHealthRuleParams = TypeOf<
|
||||
typeof anomalyDetectionJobsHealthRuleParams
|
||||
>;
|
||||
|
||||
export type TestsConfig = AnomalyDetectionJobsHealthRuleParams['testsConfig'];
|
||||
export type JobSelection = AnomalyDetectionJobsHealthRuleParams['includeJobs'];
|
||||
|
|
|
@ -30,6 +30,10 @@ import {
|
|||
getAlertingServiceProvider,
|
||||
MlAlertingServiceProvider,
|
||||
} from './providers/alerting_service';
|
||||
import {
|
||||
getJobsHealthServiceProvider,
|
||||
JobsHealthServiceProvider,
|
||||
} from '../lib/alerts/jobs_health_service';
|
||||
|
||||
export type SharedServices = JobServiceProvider &
|
||||
AnomalyDetectorsProvider &
|
||||
|
@ -38,6 +42,8 @@ export type SharedServices = JobServiceProvider &
|
|||
ResultsServiceProvider &
|
||||
MlAlertingServiceProvider;
|
||||
|
||||
export type MlServicesProviders = JobsHealthServiceProvider;
|
||||
|
||||
interface Guards {
|
||||
isMinimumLicense(): Guards;
|
||||
isFullLicense(): Guards;
|
||||
|
@ -71,7 +77,10 @@ export function createSharedServices(
|
|||
getClusterClient: () => IClusterClient | null,
|
||||
getInternalSavedObjectsClient: () => SavedObjectsClientContract | null,
|
||||
isMlReady: () => Promise<void>
|
||||
): SharedServices {
|
||||
): {
|
||||
sharedServicesProviders: SharedServices;
|
||||
internalServicesProviders: MlServicesProviders;
|
||||
} {
|
||||
const { isFullLicense, isMinimumLicense } = licenseChecks(mlLicense);
|
||||
function getGuards(
|
||||
request: KibanaRequest,
|
||||
|
@ -118,12 +127,23 @@ export function createSharedServices(
|
|||
}
|
||||
|
||||
return {
|
||||
...getJobServiceProvider(getGuards),
|
||||
...getAnomalyDetectorsProvider(getGuards),
|
||||
...getModulesProvider(getGuards),
|
||||
...getResultsServiceProvider(getGuards),
|
||||
...getMlSystemProvider(getGuards, mlLicense, getSpaces, cloud, resolveMlCapabilities),
|
||||
...getAlertingServiceProvider(getGuards),
|
||||
/**
|
||||
* Exposed providers for shared services used by other plugins
|
||||
*/
|
||||
sharedServicesProviders: {
|
||||
...getJobServiceProvider(getGuards),
|
||||
...getAnomalyDetectorsProvider(getGuards),
|
||||
...getModulesProvider(getGuards),
|
||||
...getResultsServiceProvider(getGuards),
|
||||
...getMlSystemProvider(getGuards, mlLicense, getSpaces, cloud, resolveMlCapabilities),
|
||||
...getAlertingServiceProvider(getGuards),
|
||||
},
|
||||
/**
|
||||
* Services providers for ML internal usage
|
||||
*/
|
||||
internalServicesProviders: {
|
||||
...getJobsHealthServiceProvider(getGuards),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue