Add context.originalAlertState to the Metric Threshold and Inventory Threshold recovery context (#147928)

## Summary

This PR adds the `ALERT_ACTION_GROUP` to the Alerts-As-Data documents
for both the Metric Threshold and Inventory Threshold rules. It then
uses that value from the alert document in the recovery context to set
`context.originalAlertState`. This also adds
`context.originalStateWasALERT`, `context.originalStateWasWARNING`, and
`context.originalStateWasNO_DATA` (Metric Threshold Only) to allow for
conditional Mustache templates. I also fixed the types for
`getAlertByAlertUuid()` to be more accurate.

#### Metric Threshold Example
```
{{#context.originalAlertStateWasALERT}}
This is a recovery for an ALERT
{{/context.originalAlertStateWasALERT}}

{{#context.originalAlertStateWasWARNING}}
This is a recovery for a WARNING
{{/context.originalAlertStateWasWARNING}}

{{#context.originalAlertStateWasNO_DATA}}
This is a recovery for NO_DATA
{{/context.originalAlertStateWasNO_DATA}}
```

#### Inventory Threshold Example
```
{{#context.originalAlertStateWasALERT}}
This is a recovery for an ALERT
{{/context.originalAlertStateWasALERT}}

{{#context.originalAlertStateWasWARNING}}
This is a recovery for a WARNING
{{/context.originalAlertStateWasWARNING}}
```

Fixes  #145418

### How to test

1. Start Kibana and ingest some data (Metricbeat or whatever)
2. Create a rule (one for each), for the Metric Threshold rule you will
need to group by something like `host.name`
3. Set the conditions to something you can trigger, I used `NO_DATA`
4. Add a server log action for the recovery action group with
`{{context}}`, alternatively you can use the examples above to see the
Mustache logic work
5. Save the rules
6. Stop ingesting data and allow the rule to trigger a `NO DATA` alert
7. Start ingesting data so that it recovers
8. Observe the log message with `originalAlertState` as `NO DATA` for
Metric Threshold and `ALERT` for Inventory Threshold.
This commit is contained in:
Chris Cowan 2023-02-01 07:36:59 -07:00 committed by GitHub
parent a1251a93c2
commit 0d6c113ab1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 142 additions and 20 deletions

View file

@ -271,3 +271,19 @@ export const tagsActionVariableDescription = i18n.translate(
defaultMessage: 'List of tags associated with the entity where this alert triggered.',
}
);
export const originalAlertStateActionVariableDescription = i18n.translate(
'xpack.infra.metrics.alerting.originalAlertStateActionVariableDescription',
{
defaultMessage:
'The state of the alert before it recovered. This is only available in the recovery context',
}
);
export const originalAlertStateWasActionVariableDescription = i18n.translate(
'xpack.infra.metrics.alerting.originalAlertStateWasWARNINGActionVariableDescription',
{
defaultMessage:
'Boolean value of the state of the alert before it recovered. This can be used for template conditions. This is only available in the recovery context',
}
);

View file

@ -237,7 +237,7 @@ export const flattenAdditionalContext = (
};
export const getContextForRecoveredAlerts = (
alertHits: AdditionalContext | undefined | null
alertHits: AdditionalContext[] | undefined | null
): AdditionalContext => {
const alertHitsSource =
alertHits && alertHits.length > 0 ? unflattenObject(alertHits[0]._source) : undefined;

View file

@ -6,7 +6,7 @@
*/
import { i18n } from '@kbn/i18n';
import { ALERT_REASON } from '@kbn/rule-data-utils';
import { ALERT_REASON, ALERT_ACTION_GROUP } from '@kbn/rule-data-utils';
import { first, get } from 'lodash';
import {
ActionGroup,
@ -15,6 +15,7 @@ import {
AlertInstanceState as AlertState,
} from '@kbn/alerting-plugin/common';
import { Alert, RuleTypeState } from '@kbn/alerting-plugin/server';
import { getOriginalActionGroup } from '../../../utils/get_original_action_group';
import { AlertStates, InventoryMetricThresholdParams } from '../../../../common/alerting/metrics';
import { createFormatter } from '../../../../common/formatters';
import { getCustomMetricLabel } from '../../../../common/formatters/get_custom_metric_label';
@ -45,6 +46,11 @@ type InventoryMetricThresholdAllowedActionGroups = ActionGroupIdsOf<
typeof FIRED_ACTIONS | typeof WARNING_ACTIONS
>;
export const FIRED_ACTIONS_ID = 'metrics.inventory_threshold.fired';
export const WARNING_ACTIONS_ID = 'metrics.inventory_threshold.warning';
type InventoryThrehsoldActionGroup = typeof FIRED_ACTIONS_ID | typeof WARNING_ACTIONS_ID;
export type InventoryMetricThresholdRuleTypeState = RuleTypeState; // no specific state used
export type InventoryMetricThresholdAlertState = AlertState; // no specific state used
export type InventoryMetricThresholdAlertContext = AlertContext; // no specific instance context used
@ -57,6 +63,7 @@ type InventoryMetricThresholdAlert = Alert<
type InventoryMetricThresholdAlertFactory = (
id: string,
reason: string,
actionGroup: InventoryThrehsoldActionGroup,
additionalContext?: AdditionalContext | null,
threshold?: number | undefined,
value?: number | undefined
@ -90,11 +97,17 @@ export const createInventoryMetricThresholdExecutor = (libs: InfraBackendLibs) =
getAlertUuid,
getAlertByAlertUuid,
} = services;
const alertFactory: InventoryMetricThresholdAlertFactory = (id, reason, additionalContext) =>
const alertFactory: InventoryMetricThresholdAlertFactory = (
id,
reason,
actionGroup,
additionalContext
) =>
alertWithLifecycle({
id,
fields: {
[ALERT_REASON]: reason,
[ALERT_ACTION_GROUP]: actionGroup,
...flattenAdditionalContext(additionalContext),
},
});
@ -107,7 +120,7 @@ export const createInventoryMetricThresholdExecutor = (libs: InfraBackendLibs) =
logger.error(e.message);
const actionGroupId = FIRED_ACTIONS.id; // Change this to an Error action group when able
const reason = buildInvalidQueryAlertReason(params.filterQueryText);
const alert = alertFactory(UNGROUPED_FACTORY_KEY, reason);
const alert = alertFactory(UNGROUPED_FACTORY_KEY, reason, actionGroupId);
const indexedStartedDate =
getAlertStartedDate(UNGROUPED_FACTORY_KEY) ?? startedAt.toISOString();
const alertUuid = getAlertUuid(UNGROUPED_FACTORY_KEY);
@ -212,11 +225,11 @@ export const createInventoryMetricThresholdExecutor = (libs: InfraBackendLibs) =
}
if (reason) {
const actionGroupId =
nextState === AlertStates.WARNING ? WARNING_ACTIONS.id : FIRED_ACTIONS.id;
nextState === AlertStates.WARNING ? WARNING_ACTIONS_ID : FIRED_ACTIONS_ID;
const additionalContext = results && results.length > 0 ? results[0][group].context : null;
const alert = alertFactory(group, reason, additionalContext);
const alert = alertFactory(group, reason, actionGroupId, additionalContext);
const indexedStartedDate = getAlertStartedDate(group) ?? startedAt.toISOString();
const alertUuid = getAlertUuid(group);
@ -255,6 +268,7 @@ export const createInventoryMetricThresholdExecutor = (libs: InfraBackendLibs) =
const alertUuid = getAlertUuid(recoveredAlertId);
const alertHits = alertUuid ? await getAlertByAlertUuid(alertUuid) : undefined;
const additionalContext = getContextForRecoveredAlerts(alertHits);
const originalActionGroup = getOriginalActionGroup(alertHits);
alert.setContext({
alertDetailsUrl: getAlertDetailsUrl(libs.basePath, spaceId, alertUuid),
@ -270,6 +284,9 @@ export const createInventoryMetricThresholdExecutor = (libs: InfraBackendLibs) =
timestamp: indexedStartedDate,
spaceId,
}),
originalAlertState: translateActionGroupToAlertState(originalActionGroup),
originalAlertStateWasALERT: originalActionGroup === FIRED_ACTIONS_ID,
originalAlertStateWasWARNING: originalActionGroup === WARNING_ACTIONS_ID,
...additionalContext,
});
}
@ -335,14 +352,12 @@ const mapToConditionsLookup = (
{}
);
export const FIRED_ACTIONS_ID = 'metrics.inventory_threshold.fired';
export const FIRED_ACTIONS: ActionGroup<typeof FIRED_ACTIONS_ID> = {
id: FIRED_ACTIONS_ID,
name: i18n.translate('xpack.infra.metrics.alerting.inventory.threshold.fired', {
defaultMessage: 'Alert',
}),
};
export const WARNING_ACTIONS_ID = 'metrics.inventory_threshold.warning';
export const WARNING_ACTIONS = {
id: WARNING_ACTIONS_ID,
name: i18n.translate('xpack.infra.metrics.alerting.threshold.warning', {
@ -350,6 +365,17 @@ export const WARNING_ACTIONS = {
}),
};
const translateActionGroupToAlertState = (
actionGroupId: string | undefined
): string | undefined => {
if (actionGroupId === FIRED_ACTIONS.id) {
return stateToAlertMessage[AlertStates.ALERT];
}
if (actionGroupId === WARNING_ACTIONS.id) {
return stateToAlertMessage[AlertStates.WARNING];
}
};
const formatMetric = (metric: SnapshotMetricType, value: number) => {
const metricFormatter = get(METRIC_FORMATTERS, metric, METRIC_FORMATTERS.count);
if (isNaN(value)) {

View file

@ -33,6 +33,8 @@ import {
labelsActionVariableDescription,
metricActionVariableDescription,
orchestratorActionVariableDescription,
originalAlertStateActionVariableDescription,
originalAlertStateWasActionVariableDescription,
reasonActionVariableDescription,
tagsActionVariableDescription,
thresholdActionVariableDescription,
@ -124,6 +126,15 @@ export async function registerMetricInventoryThresholdRuleType(
{ name: 'orchestrator', description: orchestratorActionVariableDescription },
{ name: 'labels', description: labelsActionVariableDescription },
{ name: 'tags', description: tagsActionVariableDescription },
{ name: 'originalAlertState', description: originalAlertStateActionVariableDescription },
{
name: 'originalAlertStateWasALERT',
description: originalAlertStateWasActionVariableDescription,
},
{
name: 'originalAlertStateWasWARNING',
description: originalAlertStateWasActionVariableDescription,
},
],
},
getSummarizedAlerts: libs.metricsRules.createGetSummarizedAlerts(),

View file

@ -6,7 +6,7 @@
*/
import { i18n } from '@kbn/i18n';
import { ALERT_REASON } from '@kbn/rule-data-utils';
import { ALERT_ACTION_GROUP, ALERT_REASON } from '@kbn/rule-data-utils';
import { isEqual } from 'lodash';
import {
ActionGroupIdsOf,
@ -16,6 +16,7 @@ import {
} from '@kbn/alerting-plugin/common';
import { Alert, RuleTypeState } from '@kbn/alerting-plugin/server';
import { TimeUnitChar } from '@kbn/observability-plugin/common/utils/formatters/duration';
import { getOriginalActionGroup } from '../../../utils/get_original_action_group';
import { AlertStates, Comparator } from '../../../../common/alerting/metrics';
import { createFormatter } from '../../../../common/formatters';
import { InfraBackendLibs } from '../../infra_types';
@ -53,8 +54,18 @@ export type MetricThresholdRuleTypeState = RuleTypeState & {
export type MetricThresholdAlertState = AlertState; // no specific instace state used
export type MetricThresholdAlertContext = AlertContext; // no specific instace state used
export const FIRED_ACTIONS_ID = 'metrics.threshold.fired';
export const WARNING_ACTIONS_ID = 'metrics.threshold.warning';
export const NO_DATA_ACTIONS_ID = 'metrics.threshold.nodata';
type MetricThresholdActionGroup =
| typeof FIRED_ACTIONS_ID
| typeof WARNING_ACTIONS_ID
| typeof NO_DATA_ACTIONS_ID
| typeof RecoveredActionGroup.id;
type MetricThresholdAllowedActionGroups = ActionGroupIdsOf<
typeof FIRED_ACTIONS | typeof WARNING_ACTIONS
typeof FIRED_ACTIONS | typeof WARNING_ACTIONS | typeof NO_DATA_ACTIONS
>;
type MetricThresholdAlert = Alert<
@ -66,6 +77,7 @@ type MetricThresholdAlert = Alert<
type MetricThresholdAlertFactory = (
id: string,
reason: string,
actionGroup: MetricThresholdActionGroup,
additionalContext?: AdditionalContext | null,
threshold?: number | undefined,
value?: number | undefined
@ -101,11 +113,17 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) =>
const { alertWithLifecycle, savedObjectsClient, getAlertUuid, getAlertByAlertUuid } = services;
const alertFactory: MetricThresholdAlertFactory = (id, reason, additionalContext) =>
const alertFactory: MetricThresholdAlertFactory = (
id,
reason,
actionGroup,
additionalContext
) =>
alertWithLifecycle({
id,
fields: {
[ALERT_REASON]: reason,
[ALERT_ACTION_GROUP]: actionGroup,
...flattenAdditionalContext(additionalContext),
},
});
@ -127,9 +145,9 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) =>
} catch (e) {
logger.error(e.message);
const timestamp = startedAt.toISOString();
const actionGroupId = FIRED_ACTIONS.id; // Change this to an Error action group when able
const actionGroupId = FIRED_ACTIONS_ID; // Change this to an Error action group when able
const reason = buildInvalidQueryAlertReason(params.filterQueryText);
const alert = alertFactory(UNGROUPED_FACTORY_KEY, reason);
const alert = alertFactory(UNGROUPED_FACTORY_KEY, reason, actionGroupId);
const alertUuid = getAlertUuid(UNGROUPED_FACTORY_KEY);
alert.scheduleActions(actionGroupId, {
@ -258,14 +276,14 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) =>
if (reason) {
const timestamp = startedAt.toISOString();
const actionGroupId =
const actionGroupId: MetricThresholdActionGroup =
nextState === AlertStates.OK
? RecoveredActionGroup.id
: nextState === AlertStates.NO_DATA
? NO_DATA_ACTIONS.id
? NO_DATA_ACTIONS_ID
: nextState === AlertStates.WARNING
? WARNING_ACTIONS.id
: FIRED_ACTIONS.id;
? WARNING_ACTIONS_ID
: FIRED_ACTIONS_ID;
const additionalContext = hasAdditionalContext(params.groupBy, validGroupByForContext)
? alertResults && alertResults.length > 0
@ -273,7 +291,7 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) =>
: null
: null;
const alert = alertFactory(`${group}`, reason, additionalContext);
const alert = alertFactory(`${group}`, reason, actionGroupId, additionalContext);
const alertUuid = getAlertUuid(group);
scheduledActionsCount++;
@ -313,6 +331,7 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) =>
const alertHits = alertUuid ? await getAlertByAlertUuid(alertUuid) : undefined;
const additionalContext = getContextForRecoveredAlerts(alertHits);
const originalActionGroup = getOriginalActionGroup(alertHits);
alert.setContext({
alertDetailsUrl: getAlertDetailsUrl(libs.basePath, spaceId, alertUuid),
@ -323,6 +342,12 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) =>
timestamp: startedAt.toISOString(),
threshold: mapToConditionsLookup(criteria, (c) => c.threshold),
viewInAppUrl: getViewInMetricsAppUrl(libs.basePath, spaceId),
originalAlertState: translateActionGroupToAlertState(originalActionGroup),
originalAlertStateWasALERT: originalActionGroup === FIRED_ACTIONS.id,
originalAlertStateWasWARNING: originalActionGroup === WARNING_ACTIONS.id,
// eslint-disable-next-line @typescript-eslint/naming-convention
originalAlertStateWasNO_DATA: originalActionGroup === NO_DATA_ACTIONS.id,
...additionalContext,
});
}
@ -360,6 +385,20 @@ export const NO_DATA_ACTIONS = {
}),
};
const translateActionGroupToAlertState = (
actionGroupId: string | undefined
): string | undefined => {
if (actionGroupId === FIRED_ACTIONS.id) {
return stateToAlertMessage[AlertStates.ALERT];
}
if (actionGroupId === WARNING_ACTIONS.id) {
return stateToAlertMessage[AlertStates.WARNING];
}
if (actionGroupId === NO_DATA_ACTIONS.id) {
return stateToAlertMessage[AlertStates.NO_DATA];
}
};
const mapToConditionsLookup = (
list: any[],
mapFn: (value: any, index: number, array: any[]) => unknown

View file

@ -23,6 +23,8 @@ import {
labelsActionVariableDescription,
metricActionVariableDescription,
orchestratorActionVariableDescription,
originalAlertStateActionVariableDescription,
originalAlertStateWasActionVariableDescription,
reasonActionVariableDescription,
tagsActionVariableDescription,
thresholdActionVariableDescription,
@ -124,6 +126,19 @@ export async function registerMetricThresholdRuleType(
{ name: 'orchestrator', description: orchestratorActionVariableDescription },
{ name: 'labels', description: labelsActionVariableDescription },
{ name: 'tags', description: tagsActionVariableDescription },
{ name: 'originalAlertState', description: originalAlertStateActionVariableDescription },
{
name: 'originalAlertStateWasALERT',
description: originalAlertStateWasActionVariableDescription,
},
{
name: 'originalAlertStateWasWARNING',
description: originalAlertStateWasActionVariableDescription,
},
{
name: 'originalAlertStateWasNO_DATA',
description: originalAlertStateWasActionVariableDescription,
},
],
},
producer: 'infrastructure',

View file

@ -0,0 +1,15 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { ALERT_ACTION_GROUP } from '@kbn/rule-data-utils';
export const getOriginalActionGroup = (
alertHits: Array<{ [id: string]: any }> | null | undefined
) => {
const source = alertHits && alertHits.length > 0 ? alertHits[0]._source : undefined;
return source?.[ALERT_ACTION_GROUP];
};

View file

@ -75,7 +75,7 @@ export interface LifecycleAlertServices<
alertWithLifecycle: LifecycleAlertService<InstanceState, InstanceContext, ActionGroupIds>;
getAlertStartedDate: (alertInstanceId: string) => string | null;
getAlertUuid: (alertInstanceId: string) => string;
getAlertByAlertUuid: (alertUuid: string) => { [x: string]: any } | null;
getAlertByAlertUuid: (alertUuid: string) => Promise<Array<{ [id: string]: any }> | null>;
}
export type LifecycleRuleExecutor<

View file

@ -37,5 +37,5 @@ export const createLifecycleAlertServicesMock = <
alertWithLifecycle: ({ id }) => alertServices.alertFactory.create(id),
getAlertStartedDate: jest.fn((id: string) => null),
getAlertUuid: jest.fn((id: string) => 'mock-alert-uuid'),
getAlertByAlertUuid: jest.fn((id: string) => null),
getAlertByAlertUuid: jest.fn((id: string) => Promise.resolve(null)),
});