mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 09:19:04 -04:00
[ML] Update alerts-as-data payload for Anomaly detection health and Transform health rules (#176307)
This commit is contained in:
parent
2598b81704
commit
ec8e464251
13 changed files with 672 additions and 94 deletions
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
// ---------------------------------- WARNING ----------------------------------
|
||||
// this file was generated, and should not be edited by hand
|
||||
// ---------------------------------- WARNING ----------------------------------
|
||||
import * as rt from 'io-ts';
|
||||
import { Either } from 'fp-ts/lib/Either';
|
||||
import { AlertSchema } from './alert_schema';
|
||||
const ISO_DATE_PATTERN = /^d{4}-d{2}-d{2}Td{2}:d{2}:d{2}.d{3}Z$/;
|
||||
export const IsoDateString = new rt.Type<string, string, unknown>(
|
||||
'IsoDateString',
|
||||
rt.string.is,
|
||||
(input, context): Either<rt.Errors, string> => {
|
||||
if (typeof input === 'string' && ISO_DATE_PATTERN.test(input)) {
|
||||
return rt.success(input);
|
||||
} else {
|
||||
return rt.failure(input, context);
|
||||
}
|
||||
},
|
||||
rt.identity
|
||||
);
|
||||
export type IsoDateStringC = typeof IsoDateString;
|
||||
export const schemaUnknown = rt.unknown;
|
||||
export const schemaUnknownArray = rt.array(rt.unknown);
|
||||
export const schemaString = rt.string;
|
||||
export const schemaStringArray = rt.array(schemaString);
|
||||
export const schemaNumber = rt.number;
|
||||
export const schemaNumberArray = rt.array(schemaNumber);
|
||||
export const schemaDate = rt.union([IsoDateString, schemaNumber]);
|
||||
export const schemaDateArray = rt.array(schemaDate);
|
||||
export const schemaDateRange = rt.partial({
|
||||
gte: schemaDate,
|
||||
lte: schemaDate,
|
||||
});
|
||||
export const schemaDateRangeArray = rt.array(schemaDateRange);
|
||||
export const schemaStringOrNumber = rt.union([schemaString, schemaNumber]);
|
||||
export const schemaStringOrNumberArray = rt.array(schemaStringOrNumber);
|
||||
export const schemaBoolean = rt.boolean;
|
||||
export const schemaBooleanArray = rt.array(schemaBoolean);
|
||||
const schemaGeoPointCoords = rt.type({
|
||||
type: schemaString,
|
||||
coordinates: schemaNumberArray,
|
||||
});
|
||||
const schemaGeoPointString = schemaString;
|
||||
const schemaGeoPointLatLon = rt.type({
|
||||
lat: schemaNumber,
|
||||
lon: schemaNumber,
|
||||
});
|
||||
const schemaGeoPointLocation = rt.type({
|
||||
location: schemaNumberArray,
|
||||
});
|
||||
const schemaGeoPointLocationString = rt.type({
|
||||
location: schemaString,
|
||||
});
|
||||
export const schemaGeoPoint = rt.union([
|
||||
schemaGeoPointCoords,
|
||||
schemaGeoPointString,
|
||||
schemaGeoPointLatLon,
|
||||
schemaGeoPointLocation,
|
||||
schemaGeoPointLocationString,
|
||||
]);
|
||||
export const schemaGeoPointArray = rt.array(schemaGeoPoint);
|
||||
// prettier-ignore
|
||||
const MlAnomalyDetectionHealthAlertRequired = rt.type({
|
||||
});
|
||||
// prettier-ignore
|
||||
const MlAnomalyDetectionHealthAlertOptional = rt.partial({
|
||||
'kibana.alert.datafeed_results': rt.array(
|
||||
rt.partial({
|
||||
datafeed_id: schemaString,
|
||||
datafeed_state: schemaString,
|
||||
job_id: schemaString,
|
||||
job_state: schemaString,
|
||||
})
|
||||
),
|
||||
'kibana.alert.delayed_data_results': rt.array(
|
||||
rt.partial({
|
||||
annotation: schemaString,
|
||||
end_timestamp: schemaDate,
|
||||
job_id: schemaString,
|
||||
missed_docs_count: schemaStringOrNumber,
|
||||
})
|
||||
),
|
||||
'kibana.alert.job_errors_results': rt.array(
|
||||
rt.partial({
|
||||
errors: schemaUnknown,
|
||||
job_id: schemaString,
|
||||
})
|
||||
),
|
||||
'kibana.alert.mml_results': rt.array(
|
||||
rt.partial({
|
||||
job_id: schemaString,
|
||||
log_time: schemaDate,
|
||||
memory_status: schemaString,
|
||||
model_bytes: schemaStringOrNumber,
|
||||
model_bytes_exceeded: schemaStringOrNumber,
|
||||
model_bytes_memory_limit: schemaStringOrNumber,
|
||||
peak_model_bytes: schemaStringOrNumber,
|
||||
})
|
||||
),
|
||||
});
|
||||
|
||||
// prettier-ignore
|
||||
export const MlAnomalyDetectionHealthAlertSchema = rt.intersection([MlAnomalyDetectionHealthAlertRequired, MlAnomalyDetectionHealthAlertOptional, AlertSchema]);
|
||||
// prettier-ignore
|
||||
export type MlAnomalyDetectionHealthAlert = rt.TypeOf<typeof MlAnomalyDetectionHealthAlertSchema>;
|
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
// ---------------------------------- WARNING ----------------------------------
|
||||
// this file was generated, and should not be edited by hand
|
||||
// ---------------------------------- WARNING ----------------------------------
|
||||
import * as rt from 'io-ts';
|
||||
import { Either } from 'fp-ts/lib/Either';
|
||||
import { AlertSchema } from './alert_schema';
|
||||
const ISO_DATE_PATTERN = /^d{4}-d{2}-d{2}Td{2}:d{2}:d{2}.d{3}Z$/;
|
||||
export const IsoDateString = new rt.Type<string, string, unknown>(
|
||||
'IsoDateString',
|
||||
rt.string.is,
|
||||
(input, context): Either<rt.Errors, string> => {
|
||||
if (typeof input === 'string' && ISO_DATE_PATTERN.test(input)) {
|
||||
return rt.success(input);
|
||||
} else {
|
||||
return rt.failure(input, context);
|
||||
}
|
||||
},
|
||||
rt.identity
|
||||
);
|
||||
export type IsoDateStringC = typeof IsoDateString;
|
||||
export const schemaUnknown = rt.unknown;
|
||||
export const schemaUnknownArray = rt.array(rt.unknown);
|
||||
export const schemaString = rt.string;
|
||||
export const schemaStringArray = rt.array(schemaString);
|
||||
export const schemaNumber = rt.number;
|
||||
export const schemaNumberArray = rt.array(schemaNumber);
|
||||
export const schemaDate = rt.union([IsoDateString, schemaNumber]);
|
||||
export const schemaDateArray = rt.array(schemaDate);
|
||||
export const schemaDateRange = rt.partial({
|
||||
gte: schemaDate,
|
||||
lte: schemaDate,
|
||||
});
|
||||
export const schemaDateRangeArray = rt.array(schemaDateRange);
|
||||
export const schemaStringOrNumber = rt.union([schemaString, schemaNumber]);
|
||||
export const schemaStringOrNumberArray = rt.array(schemaStringOrNumber);
|
||||
export const schemaBoolean = rt.boolean;
|
||||
export const schemaBooleanArray = rt.array(schemaBoolean);
|
||||
const schemaGeoPointCoords = rt.type({
|
||||
type: schemaString,
|
||||
coordinates: schemaNumberArray,
|
||||
});
|
||||
const schemaGeoPointString = schemaString;
|
||||
const schemaGeoPointLatLon = rt.type({
|
||||
lat: schemaNumber,
|
||||
lon: schemaNumber,
|
||||
});
|
||||
const schemaGeoPointLocation = rt.type({
|
||||
location: schemaNumberArray,
|
||||
});
|
||||
const schemaGeoPointLocationString = rt.type({
|
||||
location: schemaString,
|
||||
});
|
||||
export const schemaGeoPoint = rt.union([
|
||||
schemaGeoPointCoords,
|
||||
schemaGeoPointString,
|
||||
schemaGeoPointLatLon,
|
||||
schemaGeoPointLocation,
|
||||
schemaGeoPointLocationString,
|
||||
]);
|
||||
export const schemaGeoPointArray = rt.array(schemaGeoPoint);
|
||||
// prettier-ignore
|
||||
const TransformHealthAlertRequired = rt.type({
|
||||
});
|
||||
// prettier-ignore
|
||||
const TransformHealthAlertOptional = rt.partial({
|
||||
'kibana.alert.results': rt.array(
|
||||
rt.partial({
|
||||
description: schemaString,
|
||||
health_status: schemaString,
|
||||
issues: schemaUnknown,
|
||||
node_name: schemaString,
|
||||
transform_id: schemaString,
|
||||
transform_state: schemaString,
|
||||
})
|
||||
),
|
||||
});
|
||||
|
||||
// prettier-ignore
|
||||
export const TransformHealthAlertSchema = rt.intersection([TransformHealthAlertRequired, TransformHealthAlertOptional, AlertSchema]);
|
||||
// prettier-ignore
|
||||
export type TransformHealthAlert = rt.TypeOf<typeof TransformHealthAlertSchema>;
|
|
@ -15,6 +15,8 @@ import type { ObservabilityUptimeAlert } from './generated/observability_uptime_
|
|||
import type { SecurityAlert } from './generated/security_schema';
|
||||
import type { MlAnomalyDetectionAlert } from './generated/ml_anomaly_detection_schema';
|
||||
import type { DefaultAlert } from './generated/default_schema';
|
||||
import type { MlAnomalyDetectionHealthAlert } from './generated/ml_anomaly_detection_health_schema';
|
||||
import type { TransformHealthAlert } from './generated/transform_health_schema';
|
||||
|
||||
export * from './create_schema_from_field_map';
|
||||
|
||||
|
@ -27,7 +29,9 @@ export type { ObservabilityUptimeAlert } from './generated/observability_uptime_
|
|||
export type { SecurityAlert } from './generated/security_schema';
|
||||
export type { StackAlert } from './generated/stack_schema';
|
||||
export type { MlAnomalyDetectionAlert } from './generated/ml_anomaly_detection_schema';
|
||||
export type { MlAnomalyDetectionHealthAlert } from './generated/ml_anomaly_detection_health_schema';
|
||||
export type { DefaultAlert } from './generated/default_schema';
|
||||
export type { TransformHealthAlert } from './generated/transform_health_schema';
|
||||
|
||||
export type AADAlert =
|
||||
| Alert
|
||||
|
@ -38,4 +42,6 @@ export type AADAlert =
|
|||
| ObservabilityUptimeAlert
|
||||
| SecurityAlert
|
||||
| MlAnomalyDetectionAlert
|
||||
| MlAnomalyDetectionHealthAlert
|
||||
| TransformHealthAlert
|
||||
| DefaultAlert;
|
||||
|
|
|
@ -9115,7 +9115,34 @@ Object {
|
|||
|
||||
exports[`Alert as data fields checks detect AAD fields changes for: transform_health 1`] = `
|
||||
Object {
|
||||
"fieldMap": Object {},
|
||||
"fieldMap": Object {
|
||||
"kibana.alert.results": Object {
|
||||
"array": true,
|
||||
"dynamic": false,
|
||||
"properties": Object {
|
||||
"description": Object {
|
||||
"type": "text",
|
||||
},
|
||||
"health_status": Object {
|
||||
"type": "keyword",
|
||||
},
|
||||
"issues": Object {
|
||||
"type": "object",
|
||||
},
|
||||
"node_name": Object {
|
||||
"type": "keyword",
|
||||
},
|
||||
"transform_id": Object {
|
||||
"type": "keyword",
|
||||
},
|
||||
"transform_state": Object {
|
||||
"type": "keyword",
|
||||
},
|
||||
},
|
||||
"required": false,
|
||||
"type": "object",
|
||||
},
|
||||
},
|
||||
}
|
||||
`;
|
||||
|
||||
|
@ -9233,7 +9260,91 @@ Object {
|
|||
|
||||
exports[`Alert as data fields checks detect AAD fields changes for: xpack.ml.anomaly_detection_jobs_health 1`] = `
|
||||
Object {
|
||||
"fieldMap": Object {},
|
||||
"fieldMap": Object {
|
||||
"kibana.alert.datafeed_results": Object {
|
||||
"array": true,
|
||||
"dynamic": false,
|
||||
"properties": Object {
|
||||
"datafeed_id": Object {
|
||||
"type": "keyword",
|
||||
},
|
||||
"datafeed_state": Object {
|
||||
"type": "keyword",
|
||||
},
|
||||
"job_id": Object {
|
||||
"type": "keyword",
|
||||
},
|
||||
"job_state": Object {
|
||||
"type": "keyword",
|
||||
},
|
||||
},
|
||||
"required": false,
|
||||
"type": "object",
|
||||
},
|
||||
"kibana.alert.delayed_data_results": Object {
|
||||
"array": true,
|
||||
"dynamic": false,
|
||||
"properties": Object {
|
||||
"annotation": Object {
|
||||
"type": "text",
|
||||
},
|
||||
"end_timestamp": Object {
|
||||
"type": "date",
|
||||
},
|
||||
"job_id": Object {
|
||||
"type": "keyword",
|
||||
},
|
||||
"missed_docs_count": Object {
|
||||
"type": "long",
|
||||
},
|
||||
},
|
||||
"required": false,
|
||||
"type": "object",
|
||||
},
|
||||
"kibana.alert.job_errors_results": Object {
|
||||
"array": true,
|
||||
"dynamic": false,
|
||||
"properties": Object {
|
||||
"errors": Object {
|
||||
"type": "object",
|
||||
},
|
||||
"job_id": Object {
|
||||
"type": "keyword",
|
||||
},
|
||||
},
|
||||
"required": false,
|
||||
"type": "object",
|
||||
},
|
||||
"kibana.alert.mml_results": Object {
|
||||
"array": true,
|
||||
"dynamic": false,
|
||||
"properties": Object {
|
||||
"job_id": Object {
|
||||
"type": "keyword",
|
||||
},
|
||||
"log_time": Object {
|
||||
"type": "date",
|
||||
},
|
||||
"memory_status": Object {
|
||||
"type": "keyword",
|
||||
},
|
||||
"model_bytes": Object {
|
||||
"type": "long",
|
||||
},
|
||||
"model_bytes_exceeded": Object {
|
||||
"type": "long",
|
||||
},
|
||||
"model_bytes_memory_limit": Object {
|
||||
"type": "long",
|
||||
},
|
||||
"peak_model_bytes": Object {
|
||||
"type": "long",
|
||||
},
|
||||
},
|
||||
"required": false,
|
||||
"type": "object",
|
||||
},
|
||||
},
|
||||
}
|
||||
`;
|
||||
|
||||
|
|
|
@ -85,6 +85,8 @@ export const HEALTH_CHECK_NAMES: Record<JobsHealthTests, { name: string; descrip
|
|||
};
|
||||
|
||||
const ML_ALERT_NAMESPACE = ALERT_NAMESPACE;
|
||||
|
||||
// Anomaly detection rule type fields
|
||||
export const ALERT_ANOMALY_TIMESTAMP = `${ML_ALERT_NAMESPACE}.anomaly_timestamp` as const;
|
||||
export const ALERT_ANOMALY_DETECTION_JOB_ID = `${ML_ALERT_NAMESPACE}.job_id` as const;
|
||||
export const ALERT_ANOMALY_SCORE = `${ML_ALERT_NAMESPACE}.anomaly_score` as const;
|
||||
|
@ -92,6 +94,12 @@ export const ALERT_ANOMALY_IS_INTERIM = `${ML_ALERT_NAMESPACE}.is_interim` as co
|
|||
export const ALERT_TOP_RECORDS = `${ML_ALERT_NAMESPACE}.top_records` as const;
|
||||
export const ALERT_TOP_INFLUENCERS = `${ML_ALERT_NAMESPACE}.top_influencers` as const;
|
||||
|
||||
// Anomaly detection health rule type fields
|
||||
export const ALERT_MML_RESULTS = `${ML_ALERT_NAMESPACE}.mml_results` as const;
|
||||
export const ALERT_DATAFEED_RESULTS = `${ML_ALERT_NAMESPACE}.datafeed_results` as const;
|
||||
export const ALERT_DELAYED_DATA_RESULTS = `${ML_ALERT_NAMESPACE}.delayed_data_results` as const;
|
||||
export const ALERT_JOB_ERRORS_RESULTS = `${ML_ALERT_NAMESPACE}.job_errors_results` as const;
|
||||
|
||||
export const alertFieldNameMap = Object.freeze<Record<string, string>>({
|
||||
[ALERT_RULE_NAME]: i18n.translate('xpack.ml.alertsTable.columns.ruleName', {
|
||||
defaultMessage: 'Rule name',
|
||||
|
|
|
@ -254,6 +254,10 @@ describe('JobsHealthService', () => {
|
|||
message: 'No errors in the jobs messages.',
|
||||
results: [],
|
||||
},
|
||||
payload: {
|
||||
'kibana.alert.job_errors_results': [],
|
||||
'kibana.alert.reason': 'No errors in the jobs messages.',
|
||||
},
|
||||
isHealthy: true,
|
||||
name: 'Errors in job messages',
|
||||
},
|
||||
|
@ -310,6 +314,18 @@ describe('JobsHealthService', () => {
|
|||
],
|
||||
message: 'Job test_job_01 is suffering from delayed data.',
|
||||
},
|
||||
payload: {
|
||||
'kibana.alert.delayed_data_results': [
|
||||
{
|
||||
annotation:
|
||||
'Datafeed has missed 11 documents due to ingest latency, latest bucket with missing data is [2021-07-30T13:50:00.000Z]. Consider increasing query_delay',
|
||||
end_timestamp: 1627653300000,
|
||||
job_id: 'test_job_01',
|
||||
missed_docs_count: 11,
|
||||
},
|
||||
],
|
||||
'kibana.alert.reason': 'Job test_job_01 is suffering from delayed data.',
|
||||
},
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
@ -365,6 +381,17 @@ describe('JobsHealthService', () => {
|
|||
],
|
||||
message: 'Datafeed is not started for job test_job_02',
|
||||
},
|
||||
payload: {
|
||||
'kibana.alert.datafeed_results': [
|
||||
{
|
||||
datafeed_id: 'test_datafeed_02',
|
||||
datafeed_state: 'stopped',
|
||||
job_id: 'test_job_02',
|
||||
job_state: 'opened',
|
||||
},
|
||||
],
|
||||
'kibana.alert.reason': 'Datafeed is not started for job test_job_02',
|
||||
},
|
||||
},
|
||||
{
|
||||
isHealthy: false,
|
||||
|
@ -384,6 +411,21 @@ describe('JobsHealthService', () => {
|
|||
message:
|
||||
'Job test_job_01 reached the hard model memory limit. Assign more memory to the job and restore it from a snapshot taken prior to reaching the hard limit.',
|
||||
},
|
||||
payload: {
|
||||
'kibana.alert.mml_results': [
|
||||
{
|
||||
job_id: 'test_job_01',
|
||||
log_time: 1626935914540,
|
||||
memory_status: 'hard_limit',
|
||||
model_bytes: 1000000,
|
||||
model_bytes_exceeded: 200000,
|
||||
model_bytes_memory_limit: 800000,
|
||||
peak_model_bytes: 1000000,
|
||||
},
|
||||
],
|
||||
'kibana.alert.reason':
|
||||
'Job test_job_01 reached the hard model memory limit. Assign more memory to the job and restore it from a snapshot taken prior to reaching the hard limit.',
|
||||
},
|
||||
},
|
||||
{
|
||||
isHealthy: false,
|
||||
|
@ -407,6 +449,25 @@ describe('JobsHealthService', () => {
|
|||
],
|
||||
message: 'Jobs test_job_01, test_job_02 are suffering from delayed data.',
|
||||
},
|
||||
payload: {
|
||||
'kibana.alert.delayed_data_results': [
|
||||
{
|
||||
annotation:
|
||||
'Datafeed has missed 11 documents due to ingest latency, latest bucket with missing data is [2021-07-30T13:50:00.000Z]. Consider increasing query_delay',
|
||||
end_timestamp: 1627653300000,
|
||||
job_id: 'test_job_01',
|
||||
missed_docs_count: 11,
|
||||
},
|
||||
{
|
||||
annotation:
|
||||
'Datafeed has missed 8 documents due to ingest latency, latest bucket with missing data is [2021-07-30T13:50:00.000Z]. Consider increasing query_delay',
|
||||
end_timestamp: 1627653300000,
|
||||
job_id: 'test_job_02',
|
||||
missed_docs_count: 8,
|
||||
},
|
||||
],
|
||||
'kibana.alert.reason': 'Jobs test_job_01, test_job_02 are suffering from delayed data.',
|
||||
},
|
||||
},
|
||||
{
|
||||
isHealthy: true,
|
||||
|
@ -415,6 +476,10 @@ describe('JobsHealthService', () => {
|
|||
message: 'No errors in the jobs messages.',
|
||||
results: [],
|
||||
},
|
||||
payload: {
|
||||
'kibana.alert.job_errors_results': [],
|
||||
'kibana.alert.reason': 'No errors in the jobs messages.',
|
||||
},
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
|
|
@ -10,18 +10,27 @@ import type { KibanaRequest, Logger, SavedObjectsClientContract } from '@kbn/cor
|
|||
import { i18n } from '@kbn/i18n';
|
||||
import type { MlJob } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
|
||||
import { isDefined } from '@kbn/ml-is-defined';
|
||||
import { ALERT_REASON } from '@kbn/rule-data-utils';
|
||||
import type { MlClient } from '../ml_client';
|
||||
import type { JobSelection } from '../../routes/schemas/alerting_schema';
|
||||
import { datafeedsProvider, type DatafeedsService } from '../../models/job_service/datafeeds';
|
||||
import { ALL_JOBS_SELECTION, HEALTH_CHECK_NAMES } from '../../../common/constants/alerts';
|
||||
import {
|
||||
ALERT_DATAFEED_RESULTS,
|
||||
ALERT_DELAYED_DATA_RESULTS,
|
||||
ALERT_JOB_ERRORS_RESULTS,
|
||||
ALERT_MML_RESULTS,
|
||||
ALL_JOBS_SELECTION,
|
||||
HEALTH_CHECK_NAMES,
|
||||
} from '../../../common/constants/alerts';
|
||||
import type { DatafeedStats } from '../../../common/types/anomaly_detection_jobs';
|
||||
import type { GetGuards } from '../../shared_services/shared_services';
|
||||
import type {
|
||||
AnomalyDetectionJobHealthAlertPayload,
|
||||
AnomalyDetectionJobsHealthAlertContext,
|
||||
DelayedDataResponse,
|
||||
DelayedDataPayloadResponse,
|
||||
JobsErrorsResponse,
|
||||
JobsHealthExecutorOptions,
|
||||
MmlTestResponse,
|
||||
MmlTestPayloadResponse,
|
||||
NotStartedDatafeedResponse,
|
||||
} from './register_jobs_monitoring_rule_type';
|
||||
import {
|
||||
|
@ -40,8 +49,9 @@ import type { FieldFormatsRegistryProvider } from '../../../common/types/kibana'
|
|||
export interface TestResult {
|
||||
name: string;
|
||||
context: AnomalyDetectionJobsHealthAlertContext;
|
||||
payload: AnomalyDetectionJobHealthAlertPayload;
|
||||
/**
|
||||
* Indicates if the health check is successful.
|
||||
* Indicates if the health check is successful.
|
||||
*/
|
||||
isHealthy: boolean;
|
||||
}
|
||||
|
@ -61,11 +71,34 @@ export function jobsHealthServiceProvider(
|
|||
*/
|
||||
const getFormatters = memoize(async () => {
|
||||
const fieldFormatsRegistry = await getFieldsFormatRegistry();
|
||||
const dateFormatter = fieldFormatsRegistry.deserialize({ id: 'date' });
|
||||
const bytesFormatter = fieldFormatsRegistry.deserialize({ id: 'bytes' });
|
||||
const dateFormat = fieldFormatsRegistry.deserialize({ id: 'date' });
|
||||
const bytesFormat = fieldFormatsRegistry.deserialize({ id: 'bytes' });
|
||||
|
||||
const dateFormatter = dateFormat.convert.bind(dateFormat);
|
||||
const bytesFormatter = bytesFormat.convert.bind(bytesFormat);
|
||||
|
||||
return {
|
||||
dateFormatter: dateFormatter.convert.bind(dateFormatter),
|
||||
bytesFormatter: bytesFormatter.convert.bind(bytesFormatter),
|
||||
dateFormatter,
|
||||
bytesFormatter,
|
||||
mmlResultsFormatter: (payload: MmlTestPayloadResponse) => {
|
||||
return {
|
||||
job_id: payload.job_id,
|
||||
memory_status: payload.memory_status,
|
||||
log_time: dateFormatter(payload.log_time),
|
||||
model_bytes: bytesFormatter(payload.model_bytes),
|
||||
model_bytes_memory_limit: bytesFormatter(payload.model_bytes_memory_limit),
|
||||
peak_model_bytes: bytesFormatter(payload.peak_model_bytes),
|
||||
model_bytes_exceeded: bytesFormatter(payload.model_bytes_exceeded),
|
||||
};
|
||||
},
|
||||
delayedDataFormatter: (payload: DelayedDataPayloadResponse) => {
|
||||
return {
|
||||
job_id: payload.job_id,
|
||||
annotation: payload.annotation,
|
||||
missed_docs_count: payload.missed_docs_count,
|
||||
end_timestamp: dateFormatter(payload.end_timestamp),
|
||||
};
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
|
@ -189,22 +222,20 @@ export function jobsHealthServiceProvider(
|
|||
* Gets the model memory report for opened jobs.
|
||||
* @param jobIds
|
||||
*/
|
||||
async getMmlReport(jobIds: string[]): Promise<MmlTestResponse[]> {
|
||||
async getMmlReport(jobIds: string[]): Promise<MmlTestPayloadResponse[]> {
|
||||
const jobsStats = await getJobStats(jobIds);
|
||||
|
||||
const { dateFormatter, bytesFormatter } = await getFormatters();
|
||||
|
||||
return jobsStats
|
||||
.filter((j) => j.state === 'opened')
|
||||
.map(({ job_id: jobId, model_size_stats: modelSizeStats }) => {
|
||||
return {
|
||||
job_id: jobId,
|
||||
memory_status: modelSizeStats.memory_status,
|
||||
log_time: dateFormatter(modelSizeStats.log_time),
|
||||
model_bytes: bytesFormatter(modelSizeStats.model_bytes),
|
||||
model_bytes_memory_limit: bytesFormatter(modelSizeStats.model_bytes_memory_limit),
|
||||
peak_model_bytes: bytesFormatter(modelSizeStats.peak_model_bytes),
|
||||
model_bytes_exceeded: bytesFormatter(modelSizeStats.model_bytes_exceeded),
|
||||
log_time: modelSizeStats.log_time,
|
||||
model_bytes: modelSizeStats.model_bytes,
|
||||
model_bytes_memory_limit: modelSizeStats.model_bytes_memory_limit!,
|
||||
peak_model_bytes: modelSizeStats.peak_model_bytes!,
|
||||
model_bytes_exceeded: modelSizeStats.model_bytes_exceeded!,
|
||||
};
|
||||
});
|
||||
},
|
||||
|
@ -221,7 +252,7 @@ export function jobsHealthServiceProvider(
|
|||
jobs: MlJob[],
|
||||
timeInterval: string | null,
|
||||
docsCount: number | null
|
||||
): Promise<[DelayedDataResponse[], DelayedDataResponse[]]> {
|
||||
): Promise<[DelayedDataPayloadResponse[], DelayedDataPayloadResponse[]]> {
|
||||
const jobIds = getJobIds(jobs);
|
||||
const datafeeds = await getDatafeeds(jobIds);
|
||||
|
||||
|
@ -235,8 +266,6 @@ export function jobsHealthServiceProvider(
|
|||
const defaultLookbackInterval = resolveLookbackInterval(resultJobs, datafeeds!);
|
||||
const earliestMs = getDelayedDataLookbackTimestamp(timeInterval, defaultLookbackInterval);
|
||||
|
||||
const { dateFormatter } = await getFormatters();
|
||||
|
||||
const annotationsData = (
|
||||
await annotationService.getDelayedDataAnnotations({
|
||||
jobIds: resultJobIds,
|
||||
|
@ -268,12 +297,6 @@ export function jobsHealthServiceProvider(
|
|||
v.end_timestamp > getDelayedDataLookbackTimestamp(timeInterval, jobLookbackInterval);
|
||||
|
||||
return isEndTimestampWithinRange;
|
||||
})
|
||||
.map((v) => {
|
||||
return {
|
||||
...v,
|
||||
end_timestamp: dateFormatter(v.end_timestamp),
|
||||
};
|
||||
});
|
||||
|
||||
return partition(annotationsData, (v) => {
|
||||
|
@ -343,28 +366,28 @@ export function jobsHealthServiceProvider(
|
|||
const datafeedResults = isHealthy ? startedDatafeeds : notStartedDatafeeds;
|
||||
const { count, jobsString } = getJobsAlertingMessageValues(datafeedResults);
|
||||
|
||||
const message = isHealthy
|
||||
? i18n.translate('xpack.ml.alertTypes.jobsHealthAlertingRule.datafeedRecoveryMessage', {
|
||||
defaultMessage:
|
||||
'Datafeed is started for {count, plural, one {job} other {jobs}} {jobsString}',
|
||||
values: { count, jobsString },
|
||||
})
|
||||
: i18n.translate('xpack.ml.alertTypes.jobsHealthAlertingRule.datafeedStateMessage', {
|
||||
defaultMessage:
|
||||
'Datafeed is not started for {count, plural, one {job} other {jobs}} {jobsString}',
|
||||
values: { count, jobsString },
|
||||
});
|
||||
|
||||
results.push({
|
||||
isHealthy,
|
||||
name: HEALTH_CHECK_NAMES.datafeed.name,
|
||||
payload: {
|
||||
[ALERT_REASON]: message,
|
||||
[ALERT_DATAFEED_RESULTS]: datafeedResults,
|
||||
},
|
||||
context: {
|
||||
results: datafeedResults,
|
||||
message: isHealthy
|
||||
? i18n.translate(
|
||||
'xpack.ml.alertTypes.jobsHealthAlertingRule.datafeedRecoveryMessage',
|
||||
{
|
||||
defaultMessage:
|
||||
'Datafeed is started for {count, plural, one {job} other {jobs}} {jobsString}',
|
||||
values: { count, jobsString },
|
||||
}
|
||||
)
|
||||
: i18n.translate(
|
||||
'xpack.ml.alertTypes.jobsHealthAlertingRule.datafeedStateMessage',
|
||||
{
|
||||
defaultMessage:
|
||||
'Datafeed is not started for {count, plural, one {job} other {jobs}} {jobsString}',
|
||||
values: { count, jobsString },
|
||||
}
|
||||
),
|
||||
message,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
@ -424,13 +447,23 @@ export function jobsHealthServiceProvider(
|
|||
}
|
||||
}
|
||||
|
||||
const mmlResults = isHealthy
|
||||
? okJobs
|
||||
: [...(hardLimitJobs ?? []), ...(softLimitJobs ?? [])];
|
||||
|
||||
const { mmlResultsFormatter } = await getFormatters();
|
||||
|
||||
results.push({
|
||||
isHealthy,
|
||||
name: HEALTH_CHECK_NAMES.mml.name,
|
||||
context: {
|
||||
results: isHealthy ? okJobs : [...(hardLimitJobs ?? []), ...(softLimitJobs ?? [])],
|
||||
results: mmlResults.map(mmlResultsFormatter),
|
||||
message,
|
||||
},
|
||||
payload: {
|
||||
[ALERT_REASON]: message,
|
||||
[ALERT_MML_RESULTS]: mmlResults,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -446,23 +479,33 @@ export function jobsHealthServiceProvider(
|
|||
const isHealthy = exceededThresholdAnnotations.length === 0;
|
||||
const { count, jobsString } = getJobsAlertingMessageValues(exceededThresholdAnnotations);
|
||||
|
||||
const message = isHealthy
|
||||
? i18n.translate(
|
||||
'xpack.ml.alertTypes.jobsHealthAlertingRule.delayedDataRecoveryMessage',
|
||||
{
|
||||
defaultMessage: 'No data delay has occurred.',
|
||||
}
|
||||
)
|
||||
: i18n.translate('xpack.ml.alertTypes.jobsHealthAlertingRule.delayedDataMessage', {
|
||||
defaultMessage:
|
||||
'{count, plural, one {Job} other {Jobs}} {jobsString} {count, plural, one {is} other {are}} suffering from delayed data.',
|
||||
values: { count, jobsString },
|
||||
});
|
||||
|
||||
const delayedDataResults = isHealthy
|
||||
? withinThresholdAnnotations
|
||||
: exceededThresholdAnnotations;
|
||||
|
||||
results.push({
|
||||
isHealthy,
|
||||
name: HEALTH_CHECK_NAMES.delayedData.name,
|
||||
context: {
|
||||
results: isHealthy ? withinThresholdAnnotations : exceededThresholdAnnotations,
|
||||
message: isHealthy
|
||||
? i18n.translate(
|
||||
'xpack.ml.alertTypes.jobsHealthAlertingRule.delayedDataRecoveryMessage',
|
||||
{
|
||||
defaultMessage: 'No data delay has occurred.',
|
||||
}
|
||||
)
|
||||
: i18n.translate('xpack.ml.alertTypes.jobsHealthAlertingRule.delayedDataMessage', {
|
||||
defaultMessage:
|
||||
'{count, plural, one {Job} other {Jobs}} {jobsString} {count, plural, one {is} other {are}} suffering from delayed data.',
|
||||
values: { count, jobsString },
|
||||
}),
|
||||
results: delayedDataResults.map((await getFormatters()).delayedDataFormatter),
|
||||
message,
|
||||
},
|
||||
payload: {
|
||||
[ALERT_REASON]: message,
|
||||
[ALERT_DELAYED_DATA_RESULTS]: delayedDataResults,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
@ -472,25 +515,31 @@ export function jobsHealthServiceProvider(
|
|||
const { count, jobsString } = getJobsAlertingMessageValues(response);
|
||||
const isHealthy = response.length === 0;
|
||||
|
||||
const message = isHealthy
|
||||
? i18n.translate(
|
||||
'xpack.ml.alertTypes.jobsHealthAlertingRule.errorMessagesRecoveredMessage',
|
||||
{
|
||||
defaultMessage:
|
||||
'No errors in the {count, plural, one {job} other {jobs}} messages.',
|
||||
values: { count },
|
||||
}
|
||||
)
|
||||
: i18n.translate('xpack.ml.alertTypes.jobsHealthAlertingRule.errorMessagesMessage', {
|
||||
defaultMessage:
|
||||
'{count, plural, one {Job} other {Jobs}} {jobsString} {count, plural, one {contains} other {contain}} errors in the messages.',
|
||||
values: { count, jobsString },
|
||||
});
|
||||
|
||||
results.push({
|
||||
isHealthy,
|
||||
name: HEALTH_CHECK_NAMES.errorMessages.name,
|
||||
context: {
|
||||
results: response,
|
||||
message: isHealthy
|
||||
? i18n.translate(
|
||||
'xpack.ml.alertTypes.jobsHealthAlertingRule.errorMessagesRecoveredMessage',
|
||||
{
|
||||
defaultMessage:
|
||||
'No errors in the {count, plural, one {job} other {jobs}} messages.',
|
||||
values: { count },
|
||||
}
|
||||
)
|
||||
: i18n.translate('xpack.ml.alertTypes.jobsHealthAlertingRule.errorMessagesMessage', {
|
||||
defaultMessage:
|
||||
'{count, plural, one {Job} other {Jobs}} {jobsString} {count, plural, one {contains} other {contain}} errors in the messages.',
|
||||
values: { count, jobsString },
|
||||
}),
|
||||
message,
|
||||
},
|
||||
payload: {
|
||||
[ALERT_REASON]: message,
|
||||
[ALERT_JOB_ERRORS_RESULTS]: response,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
|
|
@ -6,8 +6,10 @@
|
|||
*/
|
||||
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { KibanaRequest, DEFAULT_APP_CATEGORIES } from '@kbn/core/server';
|
||||
import { DEFAULT_APP_CATEGORIES, KibanaRequest } from '@kbn/core/server';
|
||||
import type {
|
||||
ByteSize,
|
||||
DateTime,
|
||||
MlDatafeedState,
|
||||
MlJobState,
|
||||
MlJobStats,
|
||||
|
@ -19,11 +21,18 @@ import type {
|
|||
RecoveredActionGroupId,
|
||||
RuleTypeState,
|
||||
} from '@kbn/alerting-plugin/common';
|
||||
import { AlertsClientError, DEFAULT_AAD_CONFIG } from '@kbn/alerting-plugin/server';
|
||||
import type { RuleExecutorOptions } from '@kbn/alerting-plugin/server';
|
||||
import type { DefaultAlert } from '@kbn/alerts-as-data-utils';
|
||||
import { AlertsClientError, IRuleTypeAlerts } from '@kbn/alerting-plugin/server';
|
||||
import { MlAnomalyDetectionHealthAlert } from '@kbn/alerts-as-data-utils';
|
||||
import { ALERT_REASON } from '@kbn/rule-data-utils';
|
||||
import { ML_ALERT_TYPES } from '../../../common/constants/alerts';
|
||||
import { ES_FIELD_TYPES } from '@kbn/field-types';
|
||||
import {
|
||||
ALERT_DATAFEED_RESULTS,
|
||||
ALERT_DELAYED_DATA_RESULTS,
|
||||
ALERT_JOB_ERRORS_RESULTS,
|
||||
ALERT_MML_RESULTS,
|
||||
ML_ALERT_TYPES,
|
||||
} from '../../../common/constants/alerts';
|
||||
import { PLUGIN_ID } from '../../../common/constants/app';
|
||||
import { MINIMUM_FULL_LICENSE } from '../../../common/license';
|
||||
import {
|
||||
|
@ -38,13 +47,23 @@ type ModelSizeStats = MlJobStats['model_size_stats'];
|
|||
export interface MmlTestResponse {
|
||||
job_id: string;
|
||||
memory_status: ModelSizeStats['memory_status'];
|
||||
log_time: ModelSizeStats['log_time'];
|
||||
log_time: string;
|
||||
model_bytes: string;
|
||||
model_bytes_memory_limit: string;
|
||||
peak_model_bytes: string;
|
||||
model_bytes_exceeded: string;
|
||||
}
|
||||
|
||||
export interface MmlTestPayloadResponse {
|
||||
job_id: string;
|
||||
memory_status: ModelSizeStats['memory_status'];
|
||||
log_time: ModelSizeStats['log_time'];
|
||||
model_bytes: ByteSize;
|
||||
model_bytes_memory_limit: ByteSize;
|
||||
peak_model_bytes: ByteSize;
|
||||
model_bytes_exceeded: ByteSize;
|
||||
}
|
||||
|
||||
export interface NotStartedDatafeedResponse {
|
||||
datafeed_id: string;
|
||||
datafeed_state: MlDatafeedState;
|
||||
|
@ -62,6 +81,15 @@ export interface DelayedDataResponse {
|
|||
end_timestamp: string;
|
||||
}
|
||||
|
||||
export interface DelayedDataPayloadResponse {
|
||||
job_id: string;
|
||||
/** Annotation string */
|
||||
annotation: string;
|
||||
/** Number of missed documents */
|
||||
missed_docs_count: number;
|
||||
end_timestamp: DateTime;
|
||||
}
|
||||
|
||||
export interface JobsErrorsResponse {
|
||||
job_id: string;
|
||||
errors: Array<Omit<JobMessage, 'timestamp'> & { timestamp: string }>;
|
||||
|
@ -78,6 +106,15 @@ export type AnomalyDetectionJobsHealthAlertContext = {
|
|||
message: string;
|
||||
} & AlertInstanceContext;
|
||||
|
||||
export type AnomalyDetectionJobHealthAlertPayload = {
|
||||
[ALERT_REASON]: string;
|
||||
} & (
|
||||
| { [ALERT_MML_RESULTS]: MmlTestPayloadResponse[] }
|
||||
| { [ALERT_DATAFEED_RESULTS]: NotStartedDatafeedResponse[] }
|
||||
| { [ALERT_DELAYED_DATA_RESULTS]: DelayedDataPayloadResponse[] }
|
||||
| { [ALERT_JOB_ERRORS_RESULTS]: JobsErrorsResponse[] }
|
||||
);
|
||||
|
||||
export const ANOMALY_DETECTION_JOB_REALTIME_ISSUE = 'anomaly_detection_realtime_issue';
|
||||
|
||||
export type AnomalyDetectionJobRealtimeIssue = typeof ANOMALY_DETECTION_JOB_REALTIME_ISSUE;
|
||||
|
@ -95,9 +132,69 @@ export type JobsHealthExecutorOptions = RuleExecutorOptions<
|
|||
Record<string, unknown>,
|
||||
AnomalyDetectionJobsHealthAlertContext,
|
||||
AnomalyDetectionJobRealtimeIssue,
|
||||
DefaultAlert
|
||||
MlAnomalyDetectionHealthAlert
|
||||
>;
|
||||
|
||||
export const ANOMALY_DETECTION_HEALTH_AAD_INDEX_NAME = 'ml.anomaly-detection-health';
|
||||
|
||||
export const ANOMALY_DETECTION_HEALTH_AAD_CONFIG: IRuleTypeAlerts<MlAnomalyDetectionHealthAlert> = {
|
||||
context: ANOMALY_DETECTION_HEALTH_AAD_INDEX_NAME,
|
||||
mappings: {
|
||||
fieldMap: {
|
||||
[ALERT_MML_RESULTS]: {
|
||||
type: ES_FIELD_TYPES.OBJECT,
|
||||
array: true,
|
||||
required: false,
|
||||
dynamic: false,
|
||||
properties: {
|
||||
job_id: { type: ES_FIELD_TYPES.KEYWORD },
|
||||
memory_status: { type: ES_FIELD_TYPES.KEYWORD },
|
||||
log_time: { type: ES_FIELD_TYPES.DATE },
|
||||
model_bytes: { type: ES_FIELD_TYPES.LONG },
|
||||
model_bytes_memory_limit: { type: ES_FIELD_TYPES.LONG },
|
||||
peak_model_bytes: { type: ES_FIELD_TYPES.LONG },
|
||||
model_bytes_exceeded: { type: ES_FIELD_TYPES.LONG },
|
||||
},
|
||||
},
|
||||
[ALERT_DATAFEED_RESULTS]: {
|
||||
type: ES_FIELD_TYPES.OBJECT,
|
||||
array: true,
|
||||
required: false,
|
||||
dynamic: false,
|
||||
properties: {
|
||||
job_id: { type: ES_FIELD_TYPES.KEYWORD },
|
||||
job_state: { type: ES_FIELD_TYPES.KEYWORD },
|
||||
datafeed_id: { type: ES_FIELD_TYPES.KEYWORD },
|
||||
datafeed_state: { type: ES_FIELD_TYPES.KEYWORD },
|
||||
},
|
||||
},
|
||||
[ALERT_DELAYED_DATA_RESULTS]: {
|
||||
type: ES_FIELD_TYPES.OBJECT,
|
||||
array: true,
|
||||
required: false,
|
||||
dynamic: false,
|
||||
properties: {
|
||||
job_id: { type: ES_FIELD_TYPES.KEYWORD },
|
||||
annotation: { type: ES_FIELD_TYPES.TEXT },
|
||||
missed_docs_count: { type: ES_FIELD_TYPES.LONG },
|
||||
end_timestamp: { type: ES_FIELD_TYPES.DATE },
|
||||
},
|
||||
},
|
||||
[ALERT_JOB_ERRORS_RESULTS]: {
|
||||
type: ES_FIELD_TYPES.OBJECT,
|
||||
array: true,
|
||||
required: false,
|
||||
dynamic: false,
|
||||
properties: {
|
||||
job_id: { type: ES_FIELD_TYPES.KEYWORD },
|
||||
errors: { type: ES_FIELD_TYPES.OBJECT },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
shouldWrite: true,
|
||||
};
|
||||
|
||||
export function registerJobsMonitoringRuleType({
|
||||
alerting,
|
||||
mlServicesProviders,
|
||||
|
@ -111,7 +208,7 @@ export function registerJobsMonitoringRuleType({
|
|||
AnomalyDetectionJobsHealthAlertContext,
|
||||
AnomalyDetectionJobRealtimeIssue,
|
||||
RecoveredActionGroupId,
|
||||
DefaultAlert
|
||||
MlAnomalyDetectionHealthAlert
|
||||
>({
|
||||
id: ML_ALERT_TYPES.AD_JOBS_HEALTH,
|
||||
name: i18n.translate('xpack.ml.jobsHealthAlertingRule.name', {
|
||||
|
@ -155,7 +252,7 @@ export function registerJobsMonitoringRuleType({
|
|||
minimumLicenseRequired: MINIMUM_FULL_LICENSE,
|
||||
isExportable: true,
|
||||
doesSetRecoveryContext: true,
|
||||
alerts: DEFAULT_AAD_CONFIG,
|
||||
alerts: ANOMALY_DETECTION_HEALTH_AAD_CONFIG,
|
||||
async executor(options) {
|
||||
const {
|
||||
services,
|
||||
|
@ -185,14 +282,12 @@ export function registerJobsMonitoringRuleType({
|
|||
.join(', ')}`
|
||||
);
|
||||
|
||||
unhealthyTests.forEach(({ name: alertName, context }) => {
|
||||
unhealthyTests.forEach(({ name: alertName, context, payload }) => {
|
||||
alertsClient.report({
|
||||
id: alertName,
|
||||
actionGroup: ANOMALY_DETECTION_JOB_REALTIME_ISSUE,
|
||||
context,
|
||||
payload: {
|
||||
[ALERT_REASON]: context.message,
|
||||
},
|
||||
payload,
|
||||
});
|
||||
});
|
||||
}
|
||||
|
@ -205,9 +300,7 @@ export function registerJobsMonitoringRuleType({
|
|||
alertsClient.setAlertData({
|
||||
id: recoveredAlertId,
|
||||
context: testResult.context,
|
||||
payload: {
|
||||
[ALERT_REASON]: testResult.context.message,
|
||||
},
|
||||
payload: testResult.payload,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
import { i18n } from '@kbn/i18n';
|
||||
|
||||
import { LicenseType } from '@kbn/licensing-plugin/common/types';
|
||||
import { ALERT_NAMESPACE } from '@kbn/rule-data-utils';
|
||||
import { TransformHealthTests } from './types/alerting';
|
||||
|
||||
export const DEFAULT_REFRESH_INTERVAL_MS = 30000;
|
||||
|
@ -163,10 +164,15 @@ export const TRANSFORM_FUNCTION = {
|
|||
|
||||
export type TransformFunction = typeof TRANSFORM_FUNCTION[keyof typeof TRANSFORM_FUNCTION];
|
||||
|
||||
// Alerting
|
||||
|
||||
export const TRANSFORM_RULE_TYPE = {
|
||||
TRANSFORM_HEALTH: 'transform_health',
|
||||
} as const;
|
||||
|
||||
const TRANSFORM_ALERT_NAMESPACE = ALERT_NAMESPACE;
|
||||
export const TRANSFORM_HEALTH_RESULTS = `${TRANSFORM_ALERT_NAMESPACE}.results` as const;
|
||||
|
||||
export const ALL_TRANSFORMS_SELECTION = '*';
|
||||
|
||||
export const TRANSFORM_HEALTH_CHECK_NAMES: Record<
|
||||
|
|
|
@ -14,12 +14,18 @@ import type {
|
|||
RecoveredActionGroupId,
|
||||
RuleTypeState,
|
||||
} from '@kbn/alerting-plugin/common';
|
||||
import { AlertsClientError, DEFAULT_AAD_CONFIG, RuleType } from '@kbn/alerting-plugin/server';
|
||||
import { AlertsClientError, IRuleTypeAlerts, RuleType } from '@kbn/alerting-plugin/server';
|
||||
import type { PluginSetupContract as AlertingSetup } from '@kbn/alerting-plugin/server';
|
||||
import type { FieldFormatsStart } from '@kbn/field-formats-plugin/server';
|
||||
import type { DefaultAlert } from '@kbn/alerts-as-data-utils';
|
||||
import type { TransformHealthAlert } from '@kbn/alerts-as-data-utils';
|
||||
import { ALERT_REASON } from '@kbn/rule-data-utils';
|
||||
import { PLUGIN, type TransformHealth, TRANSFORM_RULE_TYPE } from '../../../../common/constants';
|
||||
import { ES_FIELD_TYPES } from '@kbn/field-types';
|
||||
import {
|
||||
PLUGIN,
|
||||
type TransformHealth,
|
||||
TRANSFORM_RULE_TYPE,
|
||||
TRANSFORM_HEALTH_RESULTS,
|
||||
} from '../../../../common/constants';
|
||||
import { transformHealthRuleParams, TransformHealthRuleParams } from './schema';
|
||||
import { transformHealthServiceProvider } from './transform_health_service';
|
||||
|
||||
|
@ -35,6 +41,9 @@ export interface TransformStateReportResponse extends BaseTransformAlertResponse
|
|||
node_name?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated This health check is no longer in use
|
||||
*/
|
||||
export interface ErrorMessagesTransformResponse extends BaseTransformAlertResponse {
|
||||
error_messages: Array<{ message: string; timestamp: number; node_name?: string }>;
|
||||
}
|
||||
|
@ -63,6 +72,31 @@ interface RegisterParams {
|
|||
getFieldFormatsStart: () => FieldFormatsStart;
|
||||
}
|
||||
|
||||
export const TRANSFORM_HEALTH_AAD_INDEX_NAME = 'transform.health';
|
||||
|
||||
export const TRANSFORM_HEALTH_AAD_CONFIG: IRuleTypeAlerts<TransformHealthAlert> = {
|
||||
context: TRANSFORM_HEALTH_AAD_INDEX_NAME,
|
||||
mappings: {
|
||||
fieldMap: {
|
||||
[TRANSFORM_HEALTH_RESULTS]: {
|
||||
type: ES_FIELD_TYPES.OBJECT,
|
||||
array: true,
|
||||
required: false,
|
||||
dynamic: false,
|
||||
properties: {
|
||||
transform_id: { type: ES_FIELD_TYPES.KEYWORD },
|
||||
description: { type: ES_FIELD_TYPES.TEXT },
|
||||
health_status: { type: ES_FIELD_TYPES.KEYWORD },
|
||||
issues: { type: ES_FIELD_TYPES.OBJECT },
|
||||
transform_state: { type: ES_FIELD_TYPES.KEYWORD },
|
||||
node_name: { type: ES_FIELD_TYPES.KEYWORD },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
shouldWrite: true,
|
||||
};
|
||||
|
||||
export function registerTransformHealthRuleType(params: RegisterParams) {
|
||||
const { alerting } = params;
|
||||
alerting.registerType(getTransformHealthRuleType(params.getFieldFormatsStart));
|
||||
|
@ -78,7 +112,7 @@ export function getTransformHealthRuleType(
|
|||
TransformHealthAlertContext,
|
||||
TransformIssue,
|
||||
RecoveredActionGroupId,
|
||||
DefaultAlert
|
||||
TransformHealthAlert
|
||||
> {
|
||||
return {
|
||||
id: TRANSFORM_RULE_TYPE.TRANSFORM_HEALTH,
|
||||
|
@ -121,7 +155,7 @@ export function getTransformHealthRuleType(
|
|||
minimumLicenseRequired: PLUGIN.MINIMUM_LICENSE_REQUIRED,
|
||||
isExportable: true,
|
||||
doesSetRecoveryContext: true,
|
||||
alerts: DEFAULT_AAD_CONFIG,
|
||||
alerts: TRANSFORM_HEALTH_AAD_CONFIG,
|
||||
async executor(options) {
|
||||
const {
|
||||
services: { scopedClusterClient, alertsClient, uiSettingsClient },
|
||||
|
@ -153,6 +187,7 @@ export function getTransformHealthRuleType(
|
|||
context,
|
||||
payload: {
|
||||
[ALERT_REASON]: context.message,
|
||||
[TRANSFORM_HEALTH_RESULTS]: context.results,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
@ -168,6 +203,7 @@ export function getTransformHealthRuleType(
|
|||
context: testResult.context,
|
||||
payload: {
|
||||
[ALERT_REASON]: testResult.context.message,
|
||||
[TRANSFORM_HEALTH_RESULTS]: testResult.context.results,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
|
|
@ -154,6 +154,7 @@ export function transformHealthServiceProvider({
|
|||
},
|
||||
/**
|
||||
* Returns report about transforms that contain error messages
|
||||
* @deprecated This health check is no longer in use
|
||||
* @param transformIds
|
||||
*/
|
||||
async getErrorMessagesReport(
|
||||
|
|
|
@ -19,6 +19,7 @@ import {
|
|||
ALERT_STATUS,
|
||||
EVENT_ACTION,
|
||||
} from '@kbn/rule-data-utils';
|
||||
import { TRANSFORM_HEALTH_RESULTS } from '@kbn/transform-plugin/common/constants';
|
||||
import { FtrProviderContext } from '../../../../../../common/ftr_provider_context';
|
||||
import { getUrlPrefix, ObjectRemover } from '../../../../../../common/lib';
|
||||
import { Spaces } from '../../../../../scenarios';
|
||||
|
@ -76,7 +77,7 @@ export default function ruleTests({ getService }: FtrProviderContext) {
|
|||
const esTestIndexToolAAD = new ESTestIndexTool(
|
||||
es,
|
||||
retry,
|
||||
`.internal.alerts-default.alerts-default-000001`
|
||||
`.internal.alerts-transform.health.alerts-default-000001`
|
||||
);
|
||||
|
||||
describe('rule', async () => {
|
||||
|
@ -132,6 +133,9 @@ export default function ruleTests({ getService }: FtrProviderContext) {
|
|||
const aadDocs = await getAllAADDocs(1);
|
||||
const alertDoc = aadDocs.body.hits.hits[0]._source;
|
||||
expect(alertDoc[ALERT_REASON]).to.be(`Transform test_transform_01 is not started.`);
|
||||
expect(alertDoc[TRANSFORM_HEALTH_RESULTS]).to.eql([
|
||||
{ transform_id: 'test_transform_01', transform_state: 'stopped', health_status: 'green' },
|
||||
]);
|
||||
expect(alertDoc[ALERT_RULE_CATEGORY]).to.be(`Transform health`);
|
||||
expect(alertDoc[ALERT_RULE_NAME]).to.be(`Test all transforms`);
|
||||
expect(alertDoc[ALERT_RULE_TYPE_ID]).to.be(`transform_health`);
|
||||
|
|
|
@ -133,7 +133,7 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
|
||||
await new Promise((resolve) => setTimeout(resolve, 3000));
|
||||
|
||||
await retry.tryForTime(10000, async () => {
|
||||
await retry.tryForTime(15000, async () => {
|
||||
const resp2 = await supertest
|
||||
.post(`/internal/search/ese/${id}`)
|
||||
.set(ELASTIC_HTTP_VERSION_HEADER, '1')
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue