mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 09:48:58 -04:00
Add context.originalAlertState to the Metric Threshold and Inventory Threshold recovery context (#147928)
## Summary This PR adds the `ALERT_ACTION_GROUP` to the Alerts-As-Data documents for both the Metric Threshold and Inventory Threshold rules. It then uses that value from the alert document in the recovery context to set `context.originalAlertState`. This also adds `context.originalStateWasALERT`, `context.originalStateWasWARNING`, and `context.originalStateWasNO_DATA` (Metric Threshold Only) to allow for conditional Mustache templates. I also fixed the types for `getAlertByAlertUuid()` to be more accurate. #### Metric Threshold Example ``` {{#context.originalAlertStateWasALERT}} This is a recovery for an ALERT {{/context.originalAlertStateWasALERT}} {{#context.originalAlertStateWasWARNING}} This is a recovery for a WARNING {{/context.originalAlertStateWasWARNING}} {{#context.originalAlertStateWasNO_DATA}} This is a recovery for NO_DATA {{/context.originalAlertStateWasNO_DATA}} ``` #### Inventory Threshold Example ``` {{#context.originalAlertStateWasALERT}} This is a recovery for an ALERT {{/context.originalAlertStateWasALERT}} {{#context.originalAlertStateWasWARNING}} This is a recovery for a WARNING {{/context.originalAlertStateWasWARNING}} ``` Fixes #145418 ### How to test 1. Start Kibana and ingest some data (Metricbeat or whatever) 2. Create a rule (one for each), for the Metric Threshold rule you will need to group by something like `host.name` 3. Set the conditions to something you can trigger, I used `NO_DATA` 4. Add a server log action for the recovery action group with `{{context}}`, alternatively you can use the examples above to see the Mustache logic work 5. Save the rules 6. Stop ingesting data and allow the rule to trigger a `NO DATA` alert 7. Start ingesting data so that it recovers 8. Observe the log message with `originalAlertState` as `NO DATA` for Metric Threshold and `ALERT` for Inventory Threshold.
This commit is contained in:
parent
a1251a93c2
commit
0d6c113ab1
9 changed files with 142 additions and 20 deletions
|
@ -271,3 +271,19 @@ export const tagsActionVariableDescription = i18n.translate(
|
|||
defaultMessage: 'List of tags associated with the entity where this alert triggered.',
|
||||
}
|
||||
);
|
||||
|
||||
export const originalAlertStateActionVariableDescription = i18n.translate(
|
||||
'xpack.infra.metrics.alerting.originalAlertStateActionVariableDescription',
|
||||
{
|
||||
defaultMessage:
|
||||
'The state of the alert before it recovered. This is only available in the recovery context',
|
||||
}
|
||||
);
|
||||
|
||||
export const originalAlertStateWasActionVariableDescription = i18n.translate(
|
||||
'xpack.infra.metrics.alerting.originalAlertStateWasWARNINGActionVariableDescription',
|
||||
{
|
||||
defaultMessage:
|
||||
'Boolean value of the state of the alert before it recovered. This can be used for template conditions. This is only available in the recovery context',
|
||||
}
|
||||
);
|
||||
|
|
|
@ -237,7 +237,7 @@ export const flattenAdditionalContext = (
|
|||
};
|
||||
|
||||
export const getContextForRecoveredAlerts = (
|
||||
alertHits: AdditionalContext | undefined | null
|
||||
alertHits: AdditionalContext[] | undefined | null
|
||||
): AdditionalContext => {
|
||||
const alertHitsSource =
|
||||
alertHits && alertHits.length > 0 ? unflattenObject(alertHits[0]._source) : undefined;
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
*/
|
||||
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { ALERT_REASON } from '@kbn/rule-data-utils';
|
||||
import { ALERT_REASON, ALERT_ACTION_GROUP } from '@kbn/rule-data-utils';
|
||||
import { first, get } from 'lodash';
|
||||
import {
|
||||
ActionGroup,
|
||||
|
@ -15,6 +15,7 @@ import {
|
|||
AlertInstanceState as AlertState,
|
||||
} from '@kbn/alerting-plugin/common';
|
||||
import { Alert, RuleTypeState } from '@kbn/alerting-plugin/server';
|
||||
import { getOriginalActionGroup } from '../../../utils/get_original_action_group';
|
||||
import { AlertStates, InventoryMetricThresholdParams } from '../../../../common/alerting/metrics';
|
||||
import { createFormatter } from '../../../../common/formatters';
|
||||
import { getCustomMetricLabel } from '../../../../common/formatters/get_custom_metric_label';
|
||||
|
@ -45,6 +46,11 @@ type InventoryMetricThresholdAllowedActionGroups = ActionGroupIdsOf<
|
|||
typeof FIRED_ACTIONS | typeof WARNING_ACTIONS
|
||||
>;
|
||||
|
||||
export const FIRED_ACTIONS_ID = 'metrics.inventory_threshold.fired';
|
||||
export const WARNING_ACTIONS_ID = 'metrics.inventory_threshold.warning';
|
||||
|
||||
type InventoryThrehsoldActionGroup = typeof FIRED_ACTIONS_ID | typeof WARNING_ACTIONS_ID;
|
||||
|
||||
export type InventoryMetricThresholdRuleTypeState = RuleTypeState; // no specific state used
|
||||
export type InventoryMetricThresholdAlertState = AlertState; // no specific state used
|
||||
export type InventoryMetricThresholdAlertContext = AlertContext; // no specific instance context used
|
||||
|
@ -57,6 +63,7 @@ type InventoryMetricThresholdAlert = Alert<
|
|||
type InventoryMetricThresholdAlertFactory = (
|
||||
id: string,
|
||||
reason: string,
|
||||
actionGroup: InventoryThrehsoldActionGroup,
|
||||
additionalContext?: AdditionalContext | null,
|
||||
threshold?: number | undefined,
|
||||
value?: number | undefined
|
||||
|
@ -90,11 +97,17 @@ export const createInventoryMetricThresholdExecutor = (libs: InfraBackendLibs) =
|
|||
getAlertUuid,
|
||||
getAlertByAlertUuid,
|
||||
} = services;
|
||||
const alertFactory: InventoryMetricThresholdAlertFactory = (id, reason, additionalContext) =>
|
||||
const alertFactory: InventoryMetricThresholdAlertFactory = (
|
||||
id,
|
||||
reason,
|
||||
actionGroup,
|
||||
additionalContext
|
||||
) =>
|
||||
alertWithLifecycle({
|
||||
id,
|
||||
fields: {
|
||||
[ALERT_REASON]: reason,
|
||||
[ALERT_ACTION_GROUP]: actionGroup,
|
||||
...flattenAdditionalContext(additionalContext),
|
||||
},
|
||||
});
|
||||
|
@ -107,7 +120,7 @@ export const createInventoryMetricThresholdExecutor = (libs: InfraBackendLibs) =
|
|||
logger.error(e.message);
|
||||
const actionGroupId = FIRED_ACTIONS.id; // Change this to an Error action group when able
|
||||
const reason = buildInvalidQueryAlertReason(params.filterQueryText);
|
||||
const alert = alertFactory(UNGROUPED_FACTORY_KEY, reason);
|
||||
const alert = alertFactory(UNGROUPED_FACTORY_KEY, reason, actionGroupId);
|
||||
const indexedStartedDate =
|
||||
getAlertStartedDate(UNGROUPED_FACTORY_KEY) ?? startedAt.toISOString();
|
||||
const alertUuid = getAlertUuid(UNGROUPED_FACTORY_KEY);
|
||||
|
@ -212,11 +225,11 @@ export const createInventoryMetricThresholdExecutor = (libs: InfraBackendLibs) =
|
|||
}
|
||||
if (reason) {
|
||||
const actionGroupId =
|
||||
nextState === AlertStates.WARNING ? WARNING_ACTIONS.id : FIRED_ACTIONS.id;
|
||||
nextState === AlertStates.WARNING ? WARNING_ACTIONS_ID : FIRED_ACTIONS_ID;
|
||||
|
||||
const additionalContext = results && results.length > 0 ? results[0][group].context : null;
|
||||
|
||||
const alert = alertFactory(group, reason, additionalContext);
|
||||
const alert = alertFactory(group, reason, actionGroupId, additionalContext);
|
||||
const indexedStartedDate = getAlertStartedDate(group) ?? startedAt.toISOString();
|
||||
const alertUuid = getAlertUuid(group);
|
||||
|
||||
|
@ -255,6 +268,7 @@ export const createInventoryMetricThresholdExecutor = (libs: InfraBackendLibs) =
|
|||
const alertUuid = getAlertUuid(recoveredAlertId);
|
||||
const alertHits = alertUuid ? await getAlertByAlertUuid(alertUuid) : undefined;
|
||||
const additionalContext = getContextForRecoveredAlerts(alertHits);
|
||||
const originalActionGroup = getOriginalActionGroup(alertHits);
|
||||
|
||||
alert.setContext({
|
||||
alertDetailsUrl: getAlertDetailsUrl(libs.basePath, spaceId, alertUuid),
|
||||
|
@ -270,6 +284,9 @@ export const createInventoryMetricThresholdExecutor = (libs: InfraBackendLibs) =
|
|||
timestamp: indexedStartedDate,
|
||||
spaceId,
|
||||
}),
|
||||
originalAlertState: translateActionGroupToAlertState(originalActionGroup),
|
||||
originalAlertStateWasALERT: originalActionGroup === FIRED_ACTIONS_ID,
|
||||
originalAlertStateWasWARNING: originalActionGroup === WARNING_ACTIONS_ID,
|
||||
...additionalContext,
|
||||
});
|
||||
}
|
||||
|
@ -335,14 +352,12 @@ const mapToConditionsLookup = (
|
|||
{}
|
||||
);
|
||||
|
||||
export const FIRED_ACTIONS_ID = 'metrics.inventory_threshold.fired';
|
||||
export const FIRED_ACTIONS: ActionGroup<typeof FIRED_ACTIONS_ID> = {
|
||||
id: FIRED_ACTIONS_ID,
|
||||
name: i18n.translate('xpack.infra.metrics.alerting.inventory.threshold.fired', {
|
||||
defaultMessage: 'Alert',
|
||||
}),
|
||||
};
|
||||
export const WARNING_ACTIONS_ID = 'metrics.inventory_threshold.warning';
|
||||
export const WARNING_ACTIONS = {
|
||||
id: WARNING_ACTIONS_ID,
|
||||
name: i18n.translate('xpack.infra.metrics.alerting.threshold.warning', {
|
||||
|
@ -350,6 +365,17 @@ export const WARNING_ACTIONS = {
|
|||
}),
|
||||
};
|
||||
|
||||
const translateActionGroupToAlertState = (
|
||||
actionGroupId: string | undefined
|
||||
): string | undefined => {
|
||||
if (actionGroupId === FIRED_ACTIONS.id) {
|
||||
return stateToAlertMessage[AlertStates.ALERT];
|
||||
}
|
||||
if (actionGroupId === WARNING_ACTIONS.id) {
|
||||
return stateToAlertMessage[AlertStates.WARNING];
|
||||
}
|
||||
};
|
||||
|
||||
const formatMetric = (metric: SnapshotMetricType, value: number) => {
|
||||
const metricFormatter = get(METRIC_FORMATTERS, metric, METRIC_FORMATTERS.count);
|
||||
if (isNaN(value)) {
|
||||
|
|
|
@ -33,6 +33,8 @@ import {
|
|||
labelsActionVariableDescription,
|
||||
metricActionVariableDescription,
|
||||
orchestratorActionVariableDescription,
|
||||
originalAlertStateActionVariableDescription,
|
||||
originalAlertStateWasActionVariableDescription,
|
||||
reasonActionVariableDescription,
|
||||
tagsActionVariableDescription,
|
||||
thresholdActionVariableDescription,
|
||||
|
@ -124,6 +126,15 @@ export async function registerMetricInventoryThresholdRuleType(
|
|||
{ name: 'orchestrator', description: orchestratorActionVariableDescription },
|
||||
{ name: 'labels', description: labelsActionVariableDescription },
|
||||
{ name: 'tags', description: tagsActionVariableDescription },
|
||||
{ name: 'originalAlertState', description: originalAlertStateActionVariableDescription },
|
||||
{
|
||||
name: 'originalAlertStateWasALERT',
|
||||
description: originalAlertStateWasActionVariableDescription,
|
||||
},
|
||||
{
|
||||
name: 'originalAlertStateWasWARNING',
|
||||
description: originalAlertStateWasActionVariableDescription,
|
||||
},
|
||||
],
|
||||
},
|
||||
getSummarizedAlerts: libs.metricsRules.createGetSummarizedAlerts(),
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
*/
|
||||
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { ALERT_REASON } from '@kbn/rule-data-utils';
|
||||
import { ALERT_ACTION_GROUP, ALERT_REASON } from '@kbn/rule-data-utils';
|
||||
import { isEqual } from 'lodash';
|
||||
import {
|
||||
ActionGroupIdsOf,
|
||||
|
@ -16,6 +16,7 @@ import {
|
|||
} from '@kbn/alerting-plugin/common';
|
||||
import { Alert, RuleTypeState } from '@kbn/alerting-plugin/server';
|
||||
import { TimeUnitChar } from '@kbn/observability-plugin/common/utils/formatters/duration';
|
||||
import { getOriginalActionGroup } from '../../../utils/get_original_action_group';
|
||||
import { AlertStates, Comparator } from '../../../../common/alerting/metrics';
|
||||
import { createFormatter } from '../../../../common/formatters';
|
||||
import { InfraBackendLibs } from '../../infra_types';
|
||||
|
@ -53,8 +54,18 @@ export type MetricThresholdRuleTypeState = RuleTypeState & {
|
|||
export type MetricThresholdAlertState = AlertState; // no specific instace state used
|
||||
export type MetricThresholdAlertContext = AlertContext; // no specific instace state used
|
||||
|
||||
export const FIRED_ACTIONS_ID = 'metrics.threshold.fired';
|
||||
export const WARNING_ACTIONS_ID = 'metrics.threshold.warning';
|
||||
export const NO_DATA_ACTIONS_ID = 'metrics.threshold.nodata';
|
||||
|
||||
type MetricThresholdActionGroup =
|
||||
| typeof FIRED_ACTIONS_ID
|
||||
| typeof WARNING_ACTIONS_ID
|
||||
| typeof NO_DATA_ACTIONS_ID
|
||||
| typeof RecoveredActionGroup.id;
|
||||
|
||||
type MetricThresholdAllowedActionGroups = ActionGroupIdsOf<
|
||||
typeof FIRED_ACTIONS | typeof WARNING_ACTIONS
|
||||
typeof FIRED_ACTIONS | typeof WARNING_ACTIONS | typeof NO_DATA_ACTIONS
|
||||
>;
|
||||
|
||||
type MetricThresholdAlert = Alert<
|
||||
|
@ -66,6 +77,7 @@ type MetricThresholdAlert = Alert<
|
|||
type MetricThresholdAlertFactory = (
|
||||
id: string,
|
||||
reason: string,
|
||||
actionGroup: MetricThresholdActionGroup,
|
||||
additionalContext?: AdditionalContext | null,
|
||||
threshold?: number | undefined,
|
||||
value?: number | undefined
|
||||
|
@ -101,11 +113,17 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) =>
|
|||
|
||||
const { alertWithLifecycle, savedObjectsClient, getAlertUuid, getAlertByAlertUuid } = services;
|
||||
|
||||
const alertFactory: MetricThresholdAlertFactory = (id, reason, additionalContext) =>
|
||||
const alertFactory: MetricThresholdAlertFactory = (
|
||||
id,
|
||||
reason,
|
||||
actionGroup,
|
||||
additionalContext
|
||||
) =>
|
||||
alertWithLifecycle({
|
||||
id,
|
||||
fields: {
|
||||
[ALERT_REASON]: reason,
|
||||
[ALERT_ACTION_GROUP]: actionGroup,
|
||||
...flattenAdditionalContext(additionalContext),
|
||||
},
|
||||
});
|
||||
|
@ -127,9 +145,9 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) =>
|
|||
} catch (e) {
|
||||
logger.error(e.message);
|
||||
const timestamp = startedAt.toISOString();
|
||||
const actionGroupId = FIRED_ACTIONS.id; // Change this to an Error action group when able
|
||||
const actionGroupId = FIRED_ACTIONS_ID; // Change this to an Error action group when able
|
||||
const reason = buildInvalidQueryAlertReason(params.filterQueryText);
|
||||
const alert = alertFactory(UNGROUPED_FACTORY_KEY, reason);
|
||||
const alert = alertFactory(UNGROUPED_FACTORY_KEY, reason, actionGroupId);
|
||||
const alertUuid = getAlertUuid(UNGROUPED_FACTORY_KEY);
|
||||
|
||||
alert.scheduleActions(actionGroupId, {
|
||||
|
@ -258,14 +276,14 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) =>
|
|||
|
||||
if (reason) {
|
||||
const timestamp = startedAt.toISOString();
|
||||
const actionGroupId =
|
||||
const actionGroupId: MetricThresholdActionGroup =
|
||||
nextState === AlertStates.OK
|
||||
? RecoveredActionGroup.id
|
||||
: nextState === AlertStates.NO_DATA
|
||||
? NO_DATA_ACTIONS.id
|
||||
? NO_DATA_ACTIONS_ID
|
||||
: nextState === AlertStates.WARNING
|
||||
? WARNING_ACTIONS.id
|
||||
: FIRED_ACTIONS.id;
|
||||
? WARNING_ACTIONS_ID
|
||||
: FIRED_ACTIONS_ID;
|
||||
|
||||
const additionalContext = hasAdditionalContext(params.groupBy, validGroupByForContext)
|
||||
? alertResults && alertResults.length > 0
|
||||
|
@ -273,7 +291,7 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) =>
|
|||
: null
|
||||
: null;
|
||||
|
||||
const alert = alertFactory(`${group}`, reason, additionalContext);
|
||||
const alert = alertFactory(`${group}`, reason, actionGroupId, additionalContext);
|
||||
const alertUuid = getAlertUuid(group);
|
||||
scheduledActionsCount++;
|
||||
|
||||
|
@ -313,6 +331,7 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) =>
|
|||
|
||||
const alertHits = alertUuid ? await getAlertByAlertUuid(alertUuid) : undefined;
|
||||
const additionalContext = getContextForRecoveredAlerts(alertHits);
|
||||
const originalActionGroup = getOriginalActionGroup(alertHits);
|
||||
|
||||
alert.setContext({
|
||||
alertDetailsUrl: getAlertDetailsUrl(libs.basePath, spaceId, alertUuid),
|
||||
|
@ -323,6 +342,12 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) =>
|
|||
timestamp: startedAt.toISOString(),
|
||||
threshold: mapToConditionsLookup(criteria, (c) => c.threshold),
|
||||
viewInAppUrl: getViewInMetricsAppUrl(libs.basePath, spaceId),
|
||||
|
||||
originalAlertState: translateActionGroupToAlertState(originalActionGroup),
|
||||
originalAlertStateWasALERT: originalActionGroup === FIRED_ACTIONS.id,
|
||||
originalAlertStateWasWARNING: originalActionGroup === WARNING_ACTIONS.id,
|
||||
// eslint-disable-next-line @typescript-eslint/naming-convention
|
||||
originalAlertStateWasNO_DATA: originalActionGroup === NO_DATA_ACTIONS.id,
|
||||
...additionalContext,
|
||||
});
|
||||
}
|
||||
|
@ -360,6 +385,20 @@ export const NO_DATA_ACTIONS = {
|
|||
}),
|
||||
};
|
||||
|
||||
const translateActionGroupToAlertState = (
|
||||
actionGroupId: string | undefined
|
||||
): string | undefined => {
|
||||
if (actionGroupId === FIRED_ACTIONS.id) {
|
||||
return stateToAlertMessage[AlertStates.ALERT];
|
||||
}
|
||||
if (actionGroupId === WARNING_ACTIONS.id) {
|
||||
return stateToAlertMessage[AlertStates.WARNING];
|
||||
}
|
||||
if (actionGroupId === NO_DATA_ACTIONS.id) {
|
||||
return stateToAlertMessage[AlertStates.NO_DATA];
|
||||
}
|
||||
};
|
||||
|
||||
const mapToConditionsLookup = (
|
||||
list: any[],
|
||||
mapFn: (value: any, index: number, array: any[]) => unknown
|
||||
|
|
|
@ -23,6 +23,8 @@ import {
|
|||
labelsActionVariableDescription,
|
||||
metricActionVariableDescription,
|
||||
orchestratorActionVariableDescription,
|
||||
originalAlertStateActionVariableDescription,
|
||||
originalAlertStateWasActionVariableDescription,
|
||||
reasonActionVariableDescription,
|
||||
tagsActionVariableDescription,
|
||||
thresholdActionVariableDescription,
|
||||
|
@ -124,6 +126,19 @@ export async function registerMetricThresholdRuleType(
|
|||
{ name: 'orchestrator', description: orchestratorActionVariableDescription },
|
||||
{ name: 'labels', description: labelsActionVariableDescription },
|
||||
{ name: 'tags', description: tagsActionVariableDescription },
|
||||
{ name: 'originalAlertState', description: originalAlertStateActionVariableDescription },
|
||||
{
|
||||
name: 'originalAlertStateWasALERT',
|
||||
description: originalAlertStateWasActionVariableDescription,
|
||||
},
|
||||
{
|
||||
name: 'originalAlertStateWasWARNING',
|
||||
description: originalAlertStateWasActionVariableDescription,
|
||||
},
|
||||
{
|
||||
name: 'originalAlertStateWasNO_DATA',
|
||||
description: originalAlertStateWasActionVariableDescription,
|
||||
},
|
||||
],
|
||||
},
|
||||
producer: 'infrastructure',
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { ALERT_ACTION_GROUP } from '@kbn/rule-data-utils';
|
||||
|
||||
export const getOriginalActionGroup = (
|
||||
alertHits: Array<{ [id: string]: any }> | null | undefined
|
||||
) => {
|
||||
const source = alertHits && alertHits.length > 0 ? alertHits[0]._source : undefined;
|
||||
return source?.[ALERT_ACTION_GROUP];
|
||||
};
|
|
@ -75,7 +75,7 @@ export interface LifecycleAlertServices<
|
|||
alertWithLifecycle: LifecycleAlertService<InstanceState, InstanceContext, ActionGroupIds>;
|
||||
getAlertStartedDate: (alertInstanceId: string) => string | null;
|
||||
getAlertUuid: (alertInstanceId: string) => string;
|
||||
getAlertByAlertUuid: (alertUuid: string) => { [x: string]: any } | null;
|
||||
getAlertByAlertUuid: (alertUuid: string) => Promise<Array<{ [id: string]: any }> | null>;
|
||||
}
|
||||
|
||||
export type LifecycleRuleExecutor<
|
||||
|
|
|
@ -37,5 +37,5 @@ export const createLifecycleAlertServicesMock = <
|
|||
alertWithLifecycle: ({ id }) => alertServices.alertFactory.create(id),
|
||||
getAlertStartedDate: jest.fn((id: string) => null),
|
||||
getAlertUuid: jest.fn((id: string) => 'mock-alert-uuid'),
|
||||
getAlertByAlertUuid: jest.fn((id: string) => null),
|
||||
getAlertByAlertUuid: jest.fn((id: string) => Promise.resolve(null)),
|
||||
});
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue