mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 17:28:26 -04:00
[OBX-UX-MGTM][ALERTING] Add the reason message to the rules recovery context (#211411)
## Summary It fixes #184803 by: ### Adding the reason message to recovery context variables in the following rules: - Inventory Threshold - Metric threshold - Custom threshold - Log threshold ### Enabling recovery context and handling the recovery alert context for APM (except Anomaly) - Latency threshold - Error count - Failed transaction rate
This commit is contained in:
parent
3b3bbb1a85
commit
9a6b4ecda3
20 changed files with 572 additions and 11 deletions
|
@ -307,3 +307,7 @@ export const ANOMALY_DETECTOR_SELECTOR_OPTIONS = [
|
|||
// Client side registrations:
|
||||
// x-pack/solutions/observability/plugins/apm/public/components/alerting/<alert>/index.tsx
|
||||
// x-pack/solutions/observability/plugins/apm/public/components/alerting/register_apm_alerts
|
||||
|
||||
export interface AdditionalContext {
|
||||
[x: string]: any;
|
||||
}
|
||||
|
|
|
@ -26,7 +26,7 @@ export const errorCountMessage = i18n.translate(
|
|||
export const errorCountRecoveryMessage = i18n.translate(
|
||||
'xpack.apm.alertTypes.errorCount.defaultRecoveryMessage',
|
||||
{
|
||||
defaultMessage: `'{{context.reason}}'
|
||||
defaultMessage: `Recovered: '{{context.reason}}'
|
||||
|
||||
'{{rule.name}}' has recovered.
|
||||
|
||||
|
@ -61,7 +61,7 @@ export const transactionDurationMessage = i18n.translate(
|
|||
export const transactionDurationRecoveryMessage = i18n.translate(
|
||||
'xpack.apm.alertTypes.transactionDuration.defaultRecoveryMessage',
|
||||
{
|
||||
defaultMessage: `'{{context.reason}}'
|
||||
defaultMessage: `Recovered: '{{context.reason}}'
|
||||
|
||||
'{{rule.name}}' has recovered.
|
||||
|
||||
|
@ -97,7 +97,7 @@ export const transactionErrorRateMessage = i18n.translate(
|
|||
export const transactionErrorRateRecoveryMessage = i18n.translate(
|
||||
'xpack.apm.alertTypes.transactionErrorRate.defaultRecoveryMessage',
|
||||
{
|
||||
defaultMessage: `'{{context.reason}}'
|
||||
defaultMessage: `Recovered: '{{context.reason}}'
|
||||
|
||||
'{{rule.name}}' has recovered.
|
||||
|
||||
|
@ -132,7 +132,7 @@ export const anomalyMessage = i18n.translate(
|
|||
export const anomalyRecoveryMessage = i18n.translate(
|
||||
'xpack.apm.alertTypes.transactionDurationAnomaly.defaultRecoveryMessage',
|
||||
{
|
||||
defaultMessage: `'{{context.reason}}'
|
||||
defaultMessage: `Recovered: '{{context.reason}}'
|
||||
|
||||
'{{rule.name}}' has recovered.
|
||||
|
||||
|
|
|
@ -13,8 +13,11 @@ import { getAlertUrlErrorCount, getAlertUrlTransaction } from '../../../../commo
|
|||
import {
|
||||
anomalyMessage,
|
||||
errorCountMessage,
|
||||
errorCountRecoveryMessage,
|
||||
transactionDurationMessage,
|
||||
transactionDurationRecoveryMessage,
|
||||
transactionErrorRateMessage,
|
||||
transactionErrorRateRecoveryMessage,
|
||||
} from '../../../../common/rules/default_action_message';
|
||||
import type { AlertParams } from './anomaly_rule_type';
|
||||
|
||||
|
@ -49,6 +52,7 @@ export function registerApmRuleTypes(observabilityRuleTypeRegistry: Observabilit
|
|||
}),
|
||||
requiresAppContext: false,
|
||||
defaultActionMessage: errorCountMessage,
|
||||
defaultRecoveryMessage: errorCountRecoveryMessage,
|
||||
priority: 80,
|
||||
});
|
||||
|
||||
|
@ -80,6 +84,7 @@ export function registerApmRuleTypes(observabilityRuleTypeRegistry: Observabilit
|
|||
alertDetailsAppSection: lazy(() => import('../ui_components/alert_details_app_section')),
|
||||
requiresAppContext: false,
|
||||
defaultActionMessage: transactionDurationMessage,
|
||||
defaultRecoveryMessage: transactionDurationRecoveryMessage,
|
||||
priority: 60,
|
||||
});
|
||||
|
||||
|
@ -108,6 +113,7 @@ export function registerApmRuleTypes(observabilityRuleTypeRegistry: Observabilit
|
|||
}),
|
||||
requiresAppContext: false,
|
||||
defaultActionMessage: transactionErrorRateMessage,
|
||||
defaultRecoveryMessage: transactionErrorRateRecoveryMessage,
|
||||
priority: 70,
|
||||
});
|
||||
|
||||
|
|
|
@ -1025,4 +1025,113 @@ describe('Error count alert', () => {
|
|||
},
|
||||
});
|
||||
});
|
||||
it('sends recovered alerts with their context', async () => {
|
||||
const { services, dependencies, executor } = createRuleTypeMocks();
|
||||
|
||||
registerErrorCountRuleType(dependencies);
|
||||
|
||||
const params = {
|
||||
threshold: 2,
|
||||
windowSize: 5,
|
||||
windowUnit: 'm',
|
||||
};
|
||||
|
||||
services.scopedClusterClient.asCurrentUser.search.mockResponse({
|
||||
hits: {
|
||||
hits: [],
|
||||
total: {
|
||||
relation: 'eq',
|
||||
value: 1,
|
||||
},
|
||||
},
|
||||
aggregations: {
|
||||
error_counts: {
|
||||
buckets: [],
|
||||
},
|
||||
},
|
||||
took: 0,
|
||||
timed_out: false,
|
||||
_shards: {
|
||||
failed: 0,
|
||||
skipped: 0,
|
||||
successful: 1,
|
||||
total: 1,
|
||||
},
|
||||
});
|
||||
services.alertsClient.getRecoveredAlerts.mockReturnValue([
|
||||
{
|
||||
alert: {
|
||||
getId: jest.fn().mockReturnValue('test-id'),
|
||||
getUuid: jest.fn().mockReturnValue('test-uuid'),
|
||||
scheduledExecutionOptions: undefined,
|
||||
meta: [],
|
||||
state: [],
|
||||
context: {},
|
||||
id: 'synthtrace-high-cardinality-0_Synthtrace: many_errors',
|
||||
alertAsData: undefined,
|
||||
},
|
||||
hit: {
|
||||
'processor.event': 'error',
|
||||
'kibana.alert.evaluation.value': 60568922,
|
||||
'kibana.alert.evaluation.threshold': 24999998,
|
||||
'kibana.alert.reason':
|
||||
'Error count is 60568922 in the last 5 days for service: synthtrace-high-cardinality-0, env: Synthtrace: many_errors. Alert when > 24999998.',
|
||||
'agent.name': 'java',
|
||||
'service.environment': 'Synthtrace: many_errors',
|
||||
'service.name': 'synthtrace-high-cardinality-0',
|
||||
'kibana.alert.rule.category': 'Error count threshold',
|
||||
'kibana.alert.rule.consumer': 'alerts',
|
||||
'kibana.alert.rule.execution.uuid': '8ecb0754-1220-4b6b-b95d-87b3594e925a',
|
||||
'kibana.alert.rule.name': 'Error count threshold rule',
|
||||
'kibana.alert.rule.parameters': [],
|
||||
'kibana.alert.rule.producer': 'apm',
|
||||
'kibana.alert.rule.revision': 8,
|
||||
'kibana.alert.rule.rule_type_id': 'apm.error_rate',
|
||||
'kibana.alert.rule.tags': [],
|
||||
'kibana.alert.rule.uuid': '63028cf5-c059-4a6b-b375-fd9007233223',
|
||||
'kibana.space_ids': [],
|
||||
'@timestamp': '2025-02-20T12:11:51.960Z',
|
||||
'event.action': 'active',
|
||||
'event.kind': 'signal',
|
||||
'kibana.alert.rule.execution.timestamp': '2025-02-20T12:11:51.960Z',
|
||||
'kibana.alert.action_group': 'threshold_met',
|
||||
'kibana.alert.flapping': true,
|
||||
'kibana.alert.flapping_history': [],
|
||||
'kibana.alert.instance.id': 'synthtrace-high-cardinality-0_Synthtrace: many_errors',
|
||||
'kibana.alert.maintenance_window_ids': [],
|
||||
'kibana.alert.consecutive_matches': 2,
|
||||
'kibana.alert.status': 'active',
|
||||
'kibana.alert.uuid': '81617b97-02d2-413a-9f64-77161de80df4',
|
||||
'kibana.alert.workflow_status': 'open',
|
||||
'kibana.alert.duration.us': 12012000,
|
||||
'kibana.alert.start': '2025-02-20T12:11:39.948Z',
|
||||
'kibana.alert.time_range': [],
|
||||
'kibana.version': '9.1.0',
|
||||
tags: [],
|
||||
'kibana.alert.previous_action_group': 'threshold_met',
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
||||
await executor({ params });
|
||||
|
||||
expect(services.alertsClient.setAlertData).toHaveBeenCalledTimes(1);
|
||||
|
||||
expect(services.alertsClient.setAlertData).toHaveBeenCalledWith({
|
||||
context: {
|
||||
alertDetailsUrl: 'http://localhost:5601/eyr/app/observability/alerts/test-uuid',
|
||||
environment: 'Synthtrace: many_errors',
|
||||
errorGroupingKey: undefined,
|
||||
interval: '5 mins',
|
||||
reason:
|
||||
'Error count is 60568922 in the last 5 days for service: synthtrace-high-cardinality-0, env: Synthtrace: many_errors. Alert when > 24999998.',
|
||||
serviceName: 'synthtrace-high-cardinality-0',
|
||||
threshold: 2,
|
||||
triggerValue: 60568922,
|
||||
viewInAppUrl:
|
||||
'http://localhost:5601/eyr/app/apm/services/synthtrace-high-cardinality-0/errors?environment=Synthtrace%3A%20many_errors',
|
||||
},
|
||||
id: 'test-id',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -43,6 +43,7 @@ import {
|
|||
import type {
|
||||
THRESHOLD_MET_GROUP,
|
||||
ApmRuleParamsType,
|
||||
AdditionalContext,
|
||||
} from '../../../../../common/rules/apm_rule_types';
|
||||
import {
|
||||
APM_SERVER_FEATURE_ID,
|
||||
|
@ -105,6 +106,7 @@ export function registerErrorCountRuleType({
|
|||
actionGroups: ruleTypeConfig.actionGroups,
|
||||
defaultActionGroupId: ruleTypeConfig.defaultActionGroupId,
|
||||
validate: { params: errorCountParamsSchema },
|
||||
doesSetRecoveryContext: true,
|
||||
schemas: {
|
||||
params: {
|
||||
type: 'config-schema',
|
||||
|
@ -269,6 +271,53 @@ export function registerErrorCountRuleType({
|
|||
});
|
||||
}
|
||||
);
|
||||
// Handle recovered alerts context
|
||||
const recoveredAlerts = alertsClient.getRecoveredAlerts() ?? [];
|
||||
for (const recoveredAlert of recoveredAlerts) {
|
||||
const alertHits = recoveredAlert.hit as AdditionalContext;
|
||||
const recoveredAlertId = recoveredAlert.alert.getId();
|
||||
const alertUuid = recoveredAlert.alert.getUuid();
|
||||
const alertDetailsUrl = getAlertDetailsUrl(basePath, spaceId, alertUuid);
|
||||
const groupByFields: Record<string, string> = allGroupByFields.reduce(
|
||||
(acc, sourceField: string) => {
|
||||
if (alertHits?.[sourceField] !== undefined) {
|
||||
acc[sourceField] = alertHits[sourceField];
|
||||
}
|
||||
return acc;
|
||||
},
|
||||
{} as Record<string, string>
|
||||
);
|
||||
|
||||
const relativeViewInAppUrl = getAlertUrlErrorCount(
|
||||
groupByFields[SERVICE_NAME],
|
||||
getEnvironmentEsField(groupByFields[SERVICE_ENVIRONMENT])?.[SERVICE_ENVIRONMENT]
|
||||
);
|
||||
const viewInAppUrl = addSpaceIdToPath(
|
||||
basePath.publicBaseUrl,
|
||||
spaceId,
|
||||
relativeViewInAppUrl
|
||||
);
|
||||
const groupByActionVariables = getGroupByActionVariables(groupByFields);
|
||||
const recoveredContext = {
|
||||
alertDetailsUrl,
|
||||
interval: formatDurationFromTimeUnitChar(
|
||||
ruleParams.windowSize,
|
||||
ruleParams.windowUnit as TimeUnitChar
|
||||
),
|
||||
reason: alertHits?.[ALERT_REASON],
|
||||
// When group by doesn't include error.grouping_key, the context.error.grouping_key action variable will contain value of the Error Grouping Key filter
|
||||
errorGroupingKey: ruleParams.errorGroupingKey,
|
||||
threshold: ruleParams.threshold,
|
||||
triggerValue: alertHits?.[ALERT_EVALUATION_VALUE],
|
||||
viewInAppUrl,
|
||||
...groupByActionVariables,
|
||||
};
|
||||
|
||||
alertsClient.setAlertData({
|
||||
id: recoveredAlertId,
|
||||
context: recoveredContext,
|
||||
});
|
||||
}
|
||||
|
||||
return { state: {} };
|
||||
},
|
||||
|
|
|
@ -411,4 +411,126 @@ describe('registerTransactionDurationRuleType', () => {
|
|||
},
|
||||
});
|
||||
});
|
||||
it('sends recovered alert with their context', async () => {
|
||||
const { services, dependencies, executor } = createRuleTypeMocks();
|
||||
|
||||
registerTransactionDurationRuleType(dependencies);
|
||||
|
||||
services.scopedClusterClient.asCurrentUser.search.mockResponse({
|
||||
hits: {
|
||||
hits: [],
|
||||
total: {
|
||||
relation: 'eq',
|
||||
value: 0,
|
||||
},
|
||||
},
|
||||
aggregations: {
|
||||
series: {
|
||||
buckets: [],
|
||||
},
|
||||
},
|
||||
took: 0,
|
||||
timed_out: false,
|
||||
_shards: {
|
||||
failed: 0,
|
||||
skipped: 0,
|
||||
successful: 1,
|
||||
total: 1,
|
||||
},
|
||||
});
|
||||
|
||||
services.alertsClient.getRecoveredAlerts.mockReturnValue([
|
||||
{
|
||||
alert: {
|
||||
getId: jest.fn().mockReturnValue('test-id'),
|
||||
getUuid: jest.fn().mockReturnValue('test-uuid'),
|
||||
scheduledExecutionOptions: undefined,
|
||||
meta: {},
|
||||
state: {},
|
||||
context: {},
|
||||
id: 'synthtrace-high-cardinality-0_Synthtrace: many_errors_request',
|
||||
alertAsData: undefined,
|
||||
},
|
||||
hit: {
|
||||
'processor.event': 'transaction',
|
||||
'kibana.alert.evaluation.value': 1000000,
|
||||
'kibana.alert.evaluation.threshold': 149000,
|
||||
'kibana.alert.reason':
|
||||
'Avg. latency is 1,000 ms in the last 5 days for service: synthtrace-high-cardinality-0, env: Synthtrace: many_errors, type: request. Alert when > 149 ms.',
|
||||
'agent.name': 'java',
|
||||
labels: { custom_label: [] },
|
||||
'service.environment': 'Synthtrace: many_errors',
|
||||
'service.name': 'synthtrace-high-cardinality-0',
|
||||
'transaction.type': 'request',
|
||||
'kibana.alert.rule.category': 'Latency threshold',
|
||||
'kibana.alert.rule.consumer': 'alerts',
|
||||
'kibana.alert.rule.execution.uuid': '646b1ca4-5799-4b3f-b253-593941da2c2f',
|
||||
'kibana.alert.rule.name': 'Latency threshold rule',
|
||||
'kibana.alert.rule.parameters': {
|
||||
aggregationType: 'avg',
|
||||
threshold: 149,
|
||||
windowSize: 5,
|
||||
windowUnit: 'd',
|
||||
environment: 'ENVIRONMENT_ALL',
|
||||
},
|
||||
'kibana.alert.rule.producer': 'apm',
|
||||
'kibana.alert.rule.revision': 15,
|
||||
'kibana.alert.rule.rule_type_id': 'apm.transaction_duration',
|
||||
'kibana.alert.rule.tags': [],
|
||||
'kibana.alert.rule.uuid': '9c4a8e4f-b55c-426c-b4cc-fd2c9cb8bf89',
|
||||
'kibana.space_ids': ['default'],
|
||||
'@timestamp': '2025-02-20T12:40:40.956Z',
|
||||
'event.action': 'open',
|
||||
'event.kind': 'signal',
|
||||
'kibana.alert.rule.execution.timestamp': '2025-02-20T12:40:40.956Z',
|
||||
'kibana.alert.action_group': 'threshold_met',
|
||||
'kibana.alert.flapping': false,
|
||||
'kibana.alert.flapping_history': [true],
|
||||
'kibana.alert.instance.id':
|
||||
'synthtrace-high-cardinality-0_Synthtrace: many_errors_request',
|
||||
'kibana.alert.maintenance_window_ids': [],
|
||||
'kibana.alert.consecutive_matches': 1,
|
||||
'kibana.alert.status': 'active',
|
||||
'kibana.alert.uuid': 'b60476e6-f4e3-47a1-ac1a-a53616411b66',
|
||||
'kibana.alert.severity_improving': false,
|
||||
'kibana.alert.workflow_status': 'open',
|
||||
'kibana.alert.duration.us': 0,
|
||||
'kibana.alert.start': '2025-02-20T12:40:40.956Z',
|
||||
'kibana.alert.time_range': { gte: '2025-02-20T12:40:40.956Z' },
|
||||
'kibana.version': '9.1.0',
|
||||
tags: [],
|
||||
},
|
||||
},
|
||||
]);
|
||||
services.alertsClient.report.mockReturnValue({ uuid: 'test-uuid' });
|
||||
|
||||
const params = {
|
||||
threshold: 3000,
|
||||
windowSize: 5,
|
||||
windowUnit: 'm',
|
||||
transactionType: 'request',
|
||||
serviceName: 'opbeans-java',
|
||||
aggregationType: 'avg',
|
||||
transactionName: 'GET /orders',
|
||||
};
|
||||
await executor({ params });
|
||||
expect(services.alertsClient.setAlertData).toHaveBeenCalledTimes(1);
|
||||
expect(services.alertsClient.setAlertData).toHaveBeenCalledWith({
|
||||
context: {
|
||||
alertDetailsUrl: 'http://localhost:5601/eyr/app/observability/alerts/test-uuid',
|
||||
environment: 'Synthtrace: many_errors',
|
||||
interval: '5 mins',
|
||||
reason:
|
||||
'Avg. latency is 1,000 ms in the last 5 days for service: synthtrace-high-cardinality-0, env: Synthtrace: many_errors, type: request. Alert when > 149 ms.',
|
||||
serviceName: 'synthtrace-high-cardinality-0',
|
||||
threshold: 3000,
|
||||
transactionName: 'GET /orders',
|
||||
transactionType: 'request',
|
||||
triggerValue: '1,000 ms',
|
||||
viewInAppUrl:
|
||||
'http://localhost:5601/eyr/app/apm/services/synthtrace-high-cardinality-0?transactionType=request&environment=Synthtrace%3A%20many_errors',
|
||||
},
|
||||
id: 'test-id',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -47,6 +47,7 @@ import {
|
|||
import type {
|
||||
THRESHOLD_MET_GROUP,
|
||||
ApmRuleParamsType,
|
||||
AdditionalContext,
|
||||
} from '../../../../../common/rules/apm_rule_types';
|
||||
import {
|
||||
APM_SERVER_FEATURE_ID,
|
||||
|
@ -114,6 +115,7 @@ export function registerTransactionDurationRuleType({
|
|||
actionGroups: ruleTypeConfig.actionGroups,
|
||||
defaultActionGroupId: ruleTypeConfig.defaultActionGroupId,
|
||||
validate: { params: transactionDurationParamsSchema },
|
||||
doesSetRecoveryContext: true,
|
||||
schemas: {
|
||||
params: {
|
||||
type: 'config-schema',
|
||||
|
@ -233,7 +235,7 @@ export function registerTransactionDurationRuleType({
|
|||
|
||||
for (const bucket of response.aggregations.series.buckets) {
|
||||
const groupByFields = bucket.key.reduce((obj, bucketKey, bucketIndex) => {
|
||||
obj[allGroupByFields[bucketIndex]] = bucketKey;
|
||||
obj[allGroupByFields[bucketIndex]] = bucketKey as string;
|
||||
return obj;
|
||||
}, {} as Record<string, string>);
|
||||
|
||||
|
@ -322,6 +324,57 @@ export function registerTransactionDurationRuleType({
|
|||
context,
|
||||
});
|
||||
}
|
||||
// Handle recovered alerts context
|
||||
const recoveredAlerts = alertsClient.getRecoveredAlerts() ?? [];
|
||||
for (const recoveredAlert of recoveredAlerts) {
|
||||
const alertHits = recoveredAlert.hit as AdditionalContext;
|
||||
const recoveredAlertId = recoveredAlert.alert.getId();
|
||||
const alertUuid = recoveredAlert.alert.getUuid();
|
||||
const alertDetailsUrl = getAlertDetailsUrl(basePath, spaceId, alertUuid);
|
||||
const groupByFields: Record<string, string> = allGroupByFields.reduce(
|
||||
(acc, sourceField: string) => {
|
||||
if (alertHits?.[sourceField] !== undefined) {
|
||||
acc[sourceField] = alertHits[sourceField];
|
||||
}
|
||||
return acc;
|
||||
},
|
||||
{} as Record<string, string>
|
||||
);
|
||||
const viewInAppUrl = addSpaceIdToPath(
|
||||
basePath.publicBaseUrl,
|
||||
spaceId,
|
||||
getAlertUrlTransaction(
|
||||
groupByFields[SERVICE_NAME],
|
||||
getEnvironmentEsField(groupByFields[SERVICE_ENVIRONMENT])?.[SERVICE_ENVIRONMENT],
|
||||
groupByFields[TRANSACTION_TYPE]
|
||||
)
|
||||
);
|
||||
|
||||
const durationFormatter = getDurationFormatter(alertHits?.[ALERT_EVALUATION_VALUE]);
|
||||
const transactionDurationFormatted = durationFormatter(
|
||||
alertHits?.[ALERT_EVALUATION_VALUE]
|
||||
).formatted;
|
||||
const groupByActionVariables = getGroupByActionVariables(groupByFields);
|
||||
const recoveredContext = {
|
||||
alertDetailsUrl,
|
||||
interval: formatDurationFromTimeUnitChar(
|
||||
ruleParams.windowSize,
|
||||
ruleParams.windowUnit as TimeUnitChar
|
||||
),
|
||||
reason: alertHits?.[ALERT_REASON],
|
||||
// When group by doesn't include transaction.name, the context.transaction.name action variable will contain value of the Transaction Name filter
|
||||
transactionName: ruleParams.transactionName,
|
||||
threshold: ruleParams.threshold,
|
||||
triggerValue: transactionDurationFormatted,
|
||||
viewInAppUrl,
|
||||
...groupByActionVariables,
|
||||
};
|
||||
|
||||
alertsClient.setAlertData({
|
||||
id: recoveredAlertId,
|
||||
context: recoveredContext,
|
||||
});
|
||||
}
|
||||
|
||||
return { state: {} };
|
||||
},
|
||||
|
|
|
@ -596,4 +596,119 @@ describe('Transaction error rate alert', () => {
|
|||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('sends the recovered alerts with their context', async () => {
|
||||
const { services, dependencies, executor } = createRuleTypeMocks();
|
||||
|
||||
registerTransactionErrorRateRuleType({
|
||||
...dependencies,
|
||||
});
|
||||
services.scopedClusterClient.asCurrentUser.search.mockResponse({
|
||||
hits: {
|
||||
hits: [],
|
||||
total: {
|
||||
relation: 'eq',
|
||||
value: 1,
|
||||
},
|
||||
},
|
||||
aggregations: {
|
||||
series: {
|
||||
buckets: [],
|
||||
},
|
||||
},
|
||||
took: 0,
|
||||
timed_out: false,
|
||||
_shards: {
|
||||
failed: 0,
|
||||
skipped: 0,
|
||||
successful: 1,
|
||||
total: 1,
|
||||
},
|
||||
});
|
||||
services.alertsClient.getRecoveredAlerts.mockReturnValue([
|
||||
{
|
||||
alert: {
|
||||
getId: jest.fn().mockReturnValue('test-id'),
|
||||
getUuid: jest.fn().mockReturnValue('test-uuid'),
|
||||
scheduledExecutionOptions: undefined,
|
||||
meta: [],
|
||||
state: [],
|
||||
context: {},
|
||||
id: 'synthtrace-high-cardinality-0_Synthtrace: many_errors_request',
|
||||
alertAsData: undefined,
|
||||
},
|
||||
hit: {
|
||||
'processor.event': 'transaction',
|
||||
'kibana.alert.evaluation.value': 100,
|
||||
'kibana.alert.evaluation.threshold': 30,
|
||||
'kibana.alert.reason':
|
||||
'Failed transactions is 100% in the last 5 days for service: synthtrace-high-cardinality-0, env: Synthtrace: many_errors, type: request. Alert when > 30%.',
|
||||
'agent.name': 'java',
|
||||
labels: [],
|
||||
'service.environment': 'Synthtrace: many_errors',
|
||||
'service.name': 'synthtrace-high-cardinality-0',
|
||||
'transaction.type': 'request',
|
||||
'kibana.alert.rule.category': 'Failed transaction rate threshold',
|
||||
'kibana.alert.rule.consumer': 'alerts',
|
||||
'kibana.alert.rule.execution.uuid': '3cf39cc5-b538-492e-b45d-35b01b5f56c3',
|
||||
'kibana.alert.rule.name': 'Failed transaction rate threshold rule',
|
||||
'kibana.alert.rule.parameters': [],
|
||||
'kibana.alert.rule.producer': 'apm',
|
||||
'kibana.alert.rule.revision': 1,
|
||||
'kibana.alert.rule.rule_type_id': 'apm.transaction_error_rate',
|
||||
'kibana.alert.rule.tags': [],
|
||||
'kibana.alert.rule.uuid': '7afe1f67-4730-46ed-8cf3-9d0671eca409',
|
||||
'kibana.space_ids': [],
|
||||
'@timestamp': '2025-02-20T11:21:08.787Z',
|
||||
'event.action': 'active',
|
||||
'event.kind': 'signal',
|
||||
'kibana.alert.rule.execution.timestamp': '2025-02-20T11:21:08.787Z',
|
||||
'kibana.alert.action_group': 'threshold_met',
|
||||
'kibana.alert.flapping': false,
|
||||
'kibana.alert.flapping_history': [],
|
||||
'kibana.alert.instance.id':
|
||||
'synthtrace-high-cardinality-0_Synthtrace: many_errors_request',
|
||||
'kibana.alert.maintenance_window_ids': [],
|
||||
'kibana.alert.consecutive_matches': 2,
|
||||
'kibana.alert.status': 'active',
|
||||
'kibana.alert.uuid': 'a1c070ea-5bba-4bbb-8564-8f0e545ccb24',
|
||||
'kibana.alert.workflow_status': 'open',
|
||||
'kibana.alert.duration.us': 21018000,
|
||||
'kibana.alert.start': '2025-02-20T11:20:47.769Z',
|
||||
'kibana.alert.time_range': [],
|
||||
'kibana.version': '9.1.0',
|
||||
tags: [],
|
||||
'kibana.alert.previous_action_group': 'threshold_met',
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
||||
const params = {
|
||||
threshold: 10,
|
||||
windowSize: 5,
|
||||
windowUnit: 'm',
|
||||
};
|
||||
|
||||
await executor({ params });
|
||||
|
||||
expect(services.alertsClient.setAlertData).toHaveBeenCalledTimes(1);
|
||||
|
||||
expect(services.alertsClient.setAlertData).toHaveBeenCalledWith({
|
||||
context: {
|
||||
alertDetailsUrl: 'http://localhost:5601/eyr/app/observability/alerts/test-uuid',
|
||||
environment: 'Synthtrace: many_errors',
|
||||
interval: '5 mins',
|
||||
reason:
|
||||
'Failed transactions is 100% in the last 5 days for service: synthtrace-high-cardinality-0, env: Synthtrace: many_errors, type: request. Alert when > 30%.',
|
||||
serviceName: 'synthtrace-high-cardinality-0',
|
||||
threshold: 10,
|
||||
transactionName: undefined,
|
||||
transactionType: 'request',
|
||||
triggerValue: '100',
|
||||
viewInAppUrl:
|
||||
'http://localhost:5601/eyr/app/apm/services/synthtrace-high-cardinality-0?transactionType=request&environment=Synthtrace%3A%20many_errors',
|
||||
},
|
||||
id: 'test-id',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -48,6 +48,7 @@ import { EventOutcome } from '../../../../../common/event_outcome';
|
|||
import type {
|
||||
THRESHOLD_MET_GROUP,
|
||||
ApmRuleParamsType,
|
||||
AdditionalContext,
|
||||
} from '../../../../../common/rules/apm_rule_types';
|
||||
import {
|
||||
APM_SERVER_FEATURE_ID,
|
||||
|
@ -112,6 +113,7 @@ export function registerTransactionErrorRateRuleType({
|
|||
actionGroups: ruleTypeConfig.actionGroups,
|
||||
defaultActionGroupId: ruleTypeConfig.defaultActionGroupId,
|
||||
validate: { params: transactionErrorRateParamsSchema },
|
||||
doesSetRecoveryContext: true,
|
||||
schemas: {
|
||||
params: {
|
||||
type: 'config-schema',
|
||||
|
@ -318,6 +320,54 @@ export function registerTransactionErrorRateRuleType({
|
|||
});
|
||||
});
|
||||
|
||||
// Handle recovered alerts context
|
||||
const recoveredAlerts = alertsClient.getRecoveredAlerts() ?? [];
|
||||
for (const recoveredAlert of recoveredAlerts) {
|
||||
const alertHits = recoveredAlert.hit as AdditionalContext;
|
||||
const recoveredAlertId = recoveredAlert.alert.getId();
|
||||
const alertUuid = recoveredAlert.alert.getUuid();
|
||||
const alertDetailsUrl = getAlertDetailsUrl(basePath, spaceId, alertUuid);
|
||||
const groupByFields: Record<string, string> = allGroupByFields.reduce(
|
||||
(acc, sourceField: string) => {
|
||||
if (alertHits?.[sourceField] !== undefined) {
|
||||
acc[sourceField] = alertHits[sourceField];
|
||||
}
|
||||
return acc;
|
||||
},
|
||||
{} as Record<string, string>
|
||||
);
|
||||
const viewInAppUrl = addSpaceIdToPath(
|
||||
basePath.publicBaseUrl,
|
||||
spaceId,
|
||||
getAlertUrlTransaction(
|
||||
groupByFields[SERVICE_NAME],
|
||||
getEnvironmentEsField(groupByFields[SERVICE_ENVIRONMENT])?.[SERVICE_ENVIRONMENT],
|
||||
groupByFields[TRANSACTION_TYPE]
|
||||
)
|
||||
);
|
||||
|
||||
const groupByActionVariables = getGroupByActionVariables(groupByFields);
|
||||
const recoveredContext = {
|
||||
alertDetailsUrl,
|
||||
interval: formatDurationFromTimeUnitChar(
|
||||
ruleParams.windowSize,
|
||||
ruleParams.windowUnit as TimeUnitChar
|
||||
),
|
||||
reason: alertHits?.[ALERT_REASON],
|
||||
// When group by doesn't include transaction.name, the context.transaction.name action variable will contain value of the Transaction Name filter
|
||||
transactionName: ruleParams.transactionName,
|
||||
threshold: ruleParams.threshold,
|
||||
triggerValue: asDecimalOrInteger(alertHits?.[ALERT_EVALUATION_VALUE]),
|
||||
viewInAppUrl,
|
||||
...groupByActionVariables,
|
||||
};
|
||||
|
||||
alertsClient.setAlertData({
|
||||
id: recoveredAlertId,
|
||||
context: recoveredContext,
|
||||
});
|
||||
}
|
||||
|
||||
return { state: {} };
|
||||
},
|
||||
alerts: ApmRuleTypeAlertDefinition,
|
||||
|
|
|
@ -52,6 +52,7 @@ export const createRuleTypeMocks = () => {
|
|||
alertsClient: {
|
||||
report: jest.fn(),
|
||||
setAlertData: jest.fn(),
|
||||
getRecoveredAlerts: jest.fn(),
|
||||
},
|
||||
};
|
||||
|
||||
|
|
|
@ -42,7 +42,9 @@ const inventoryDefaultActionMessage = i18n.translate(
|
|||
const inventoryDefaultRecoveryMessage = i18n.translate(
|
||||
'xpack.infra.metrics.alerting.inventory.threshold.defaultRecoveryMessage',
|
||||
{
|
||||
defaultMessage: `'{{rule.name}}' has recovered.
|
||||
defaultMessage: `Recovered '{{context.reason}}'
|
||||
|
||||
'{{rule.name}}' has recovered.
|
||||
|
||||
- Affected: '{{context.group}}'
|
||||
- Metric: '{{context.metric}}'
|
||||
|
|
|
@ -35,7 +35,9 @@ const logThresholdDefaultActionMessage = i18n.translate(
|
|||
const logThresholdDefaultRecoveryMessage = i18n.translate(
|
||||
'xpack.infra.logs.alerting.threshold.defaultRecoveryMessage',
|
||||
{
|
||||
defaultMessage: `'{{rule.name}}' has recovered.
|
||||
defaultMessage: `Recovered: '{{context.reason}}'
|
||||
|
||||
'{{rule.name}}' has recovered.
|
||||
|
||||
[View alert details]('{{context.alertDetailsUrl}}')
|
||||
`,
|
||||
|
|
|
@ -42,7 +42,9 @@ const metricThresholdDefaultActionMessage = i18n.translate(
|
|||
const metricThresholdDefaultRecoveryMessage = i18n.translate(
|
||||
'xpack.infra.metrics.alerting.metric.threshold.defaultRecoveryMessage',
|
||||
{
|
||||
defaultMessage: `'{{rule.name}}' has recovered.
|
||||
defaultMessage: `Recovered: '{{context.reason}}'
|
||||
|
||||
'{{rule.name}}' has recovered.
|
||||
|
||||
- Affected: '{{context.group}}'
|
||||
- Metric: '{{context.metric}}'
|
||||
|
|
|
@ -350,6 +350,7 @@ export const createInventoryMetricThresholdExecutor =
|
|||
assetDetailsLocator,
|
||||
inventoryLocator,
|
||||
}),
|
||||
reason: alertHits?.[ALERT_REASON],
|
||||
originalAlertState: translateActionGroupToAlertState(originalActionGroup),
|
||||
originalAlertStateWasALERT: originalActionGroup === FIRED_ACTIONS_ID,
|
||||
originalAlertStateWasWARNING: originalActionGroup === WARNING_ACTIONS_ID,
|
||||
|
|
|
@ -920,6 +920,7 @@ const processRecoveredAlerts = ({
|
|||
groupByKeys: groupByKeysObjectForRecovered[recoveredAlertId],
|
||||
timestamp: startedAt.toISOString(),
|
||||
viewInAppUrl,
|
||||
reason: alertHits?.[ALERT_REASON],
|
||||
...additionalContext,
|
||||
};
|
||||
|
||||
|
|
|
@ -472,7 +472,7 @@ export const createMetricThresholdExecutor =
|
|||
metricsExplorerLocator,
|
||||
additionalContext,
|
||||
}),
|
||||
|
||||
reason: alertHits?.[ALERT_REASON],
|
||||
originalAlertState: translateActionGroupToAlertState(originalActionGroup),
|
||||
originalAlertStateWasALERT: originalActionGroup === FIRED_ACTIONS.id,
|
||||
originalAlertStateWasWARNING: originalActionGroup === WARNING_ACTIONS.id,
|
||||
|
|
|
@ -44,7 +44,9 @@ const thresholdDefaultActionMessage = i18n.translate(
|
|||
const thresholdDefaultRecoveryMessage = i18n.translate(
|
||||
'xpack.observability.customThreshold.rule.alerting.threshold.defaultRecoveryMessage',
|
||||
{
|
||||
defaultMessage: `'{{rule.name}}' has recovered.
|
||||
defaultMessage: `Recovered: '{{context.reason}}'
|
||||
|
||||
'{{rule.name}}' has recovered.
|
||||
|
||||
[View alert details]('{{context.alertDetailsUrl}}')
|
||||
`,
|
||||
|
|
|
@ -1660,6 +1660,47 @@ describe('The custom threshold alert type', () => {
|
|||
},
|
||||
});
|
||||
});
|
||||
test('includes reason message in the recovered alert context pulled from the last active alert ', async () => {
|
||||
setEvaluationResults([{}]);
|
||||
const mockedSetContext = jest.fn();
|
||||
services.alertsClient.getRecoveredAlerts.mockImplementation((params: any) => {
|
||||
return [
|
||||
{
|
||||
alert: {
|
||||
meta: [],
|
||||
state: [],
|
||||
context: {},
|
||||
id: 'host-0',
|
||||
getId: jest.fn().mockReturnValue('host-0'),
|
||||
getUuid: jest.fn().mockReturnValue('mockedUuid'),
|
||||
getStart: jest.fn().mockReturnValue('2024-07-18T08:09:05.697Z'),
|
||||
},
|
||||
hit: {
|
||||
'host.name': 'host-0',
|
||||
'kibana.alert.reason': 'This is reason msg for the alert',
|
||||
},
|
||||
},
|
||||
];
|
||||
});
|
||||
services.alertFactory.done.mockImplementation(() => {
|
||||
return {
|
||||
getRecoveredAlerts: jest.fn().mockReturnValue([
|
||||
{
|
||||
setContext: mockedSetContext,
|
||||
getId: jest.fn().mockReturnValue('mockedId'),
|
||||
},
|
||||
]),
|
||||
};
|
||||
});
|
||||
await execute(COMPARATORS.GREATER_THAN, [0.9]);
|
||||
expect(services.alertsClient.setAlertData).toBeCalledWith(
|
||||
expect.objectContaining({
|
||||
context: expect.objectContaining({
|
||||
reason: 'This is reason msg for the alert',
|
||||
}),
|
||||
})
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("querying a metric that hasn't reported data", () => {
|
||||
|
|
|
@ -324,6 +324,7 @@ export const createCustomThresholdExecutor = ({
|
|||
searchConfiguration: params.searchConfiguration,
|
||||
startedAt: indexedStartedAt,
|
||||
}),
|
||||
reason: alertHits?.[ALERT_REASON],
|
||||
...additionalContext,
|
||||
};
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ const sloBurnRateDefaultActionMessage = i18n.translate(
|
|||
const sloBurnRateDefaultRecoveryMessage = i18n.translate(
|
||||
'xpack.slo.rules.burnRate.defaultRecoveryMessage',
|
||||
{
|
||||
defaultMessage: `'{{context.reason}}'
|
||||
defaultMessage: `Recovered: '{{context.reason}}'
|
||||
|
||||
'{{rule.name}}' has recovered.
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue