[RAC][APM] Add {{context.reason}} variable to the rule templating language. (#125196)

* Add reason message an available rule variable

* Add alert reason to error threshold

* Add reason msg as a rule variable to the other rule types

* Fix checks

* Fix lint

* Fix tests

* Fix tests and add param

* Fix tests

* Fix transaction test

* Add msg for the transaction test
This commit is contained in:
Faisal Kanout 2022-02-23 15:59:44 +03:00 committed by GitHub
parent eb9d8798c2
commit 573fdce59f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 67 additions and 36 deletions

View file

@ -56,4 +56,13 @@ export const apmActionVariables = {
),
name: 'interval' as const,
},
reason: {
description: i18n.translate(
'xpack.apm.alerts.action_variables.reasonMessage',
{
defaultMessage: 'A concise description of the reason for the alert',
}
),
name: 'reason' as const,
},
};

View file

@ -142,6 +142,7 @@ describe('Error count alert', () => {
environment: 'env-foo',
threshold: 2,
triggerValue: 5,
reason: 'Error count is 5 in the last 5 mins for foo. Alert when > 2.',
interval: '5m',
});
expect(scheduleActions).toHaveBeenCalledWith('threshold_met', {
@ -149,11 +150,13 @@ describe('Error count alert', () => {
environment: 'env-foo-2',
threshold: 2,
triggerValue: 4,
reason: 'Error count is 4 in the last 5 mins for foo. Alert when > 2.',
interval: '5m',
});
expect(scheduleActions).toHaveBeenCalledWith('threshold_met', {
serviceName: 'bar',
environment: 'env-bar',
reason: 'Error count is 3 in the last 5 mins for bar. Alert when > 2.',
threshold: 2,
triggerValue: 3,
interval: '5m',

View file

@ -74,6 +74,7 @@ export function registerErrorCountAlertType({
apmActionVariables.threshold,
apmActionVariables.triggerValue,
apmActionVariables.interval,
apmActionVariables.reason,
],
},
producer: APM_SERVER_FEATURE_ID,
@ -139,7 +140,13 @@ export function registerErrorCountAlertType({
.filter((result) => result.errorCount >= ruleParams.threshold)
.forEach((result) => {
const { serviceName, environment, errorCount } = result;
const alertReason = formatErrorCountReason({
serviceName,
threshold: ruleParams.threshold,
measured: errorCount,
windowSize: ruleParams.windowSize,
windowUnit: ruleParams.windowUnit,
});
services
.alertWithLifecycle({
id: [AlertType.ErrorCount, serviceName, environment]
@ -151,13 +158,7 @@ export function registerErrorCountAlertType({
[PROCESSOR_EVENT]: ProcessorEvent.error,
[ALERT_EVALUATION_VALUE]: errorCount,
[ALERT_EVALUATION_THRESHOLD]: ruleParams.threshold,
[ALERT_REASON]: formatErrorCountReason({
serviceName,
threshold: ruleParams.threshold,
measured: errorCount,
windowSize: ruleParams.windowSize,
windowUnit: ruleParams.windowUnit,
}),
[ALERT_REASON]: alertReason,
},
})
.scheduleActions(alertTypeConfig.defaultActionGroupId, {
@ -166,6 +167,7 @@ export function registerErrorCountAlertType({
threshold: ruleParams.threshold,
triggerValue: errorCount,
interval: `${ruleParams.windowSize}${ruleParams.windowUnit}`,
reason: alertReason,
});
});

View file

@ -44,6 +44,7 @@ describe('registerTransactionDurationAlertType', () => {
windowUnit: 'm',
transactionType: 'request',
serviceName: 'opbeans-java',
aggregationType: 'avg',
};
await executor({ params });
expect(scheduleActions).toHaveBeenCalledTimes(1);
@ -54,6 +55,8 @@ describe('registerTransactionDurationAlertType', () => {
threshold: 3000000,
triggerValue: '5,500 ms',
interval: `5m`,
reason:
'Avg. latency is 5,500 ms in the last 5 mins for opbeans-java. Alert when > 3,000 ms.',
});
});
});

View file

@ -86,6 +86,7 @@ export function registerTransactionDurationAlertType({
apmActionVariables.threshold,
apmActionVariables.triggerValue,
apmActionVariables.interval,
apmActionVariables.reason,
],
},
producer: APM_SERVER_FEATURE_ID,
@ -178,7 +179,15 @@ export function registerTransactionDurationAlertType({
const durationFormatter = getDurationFormatter(transactionDuration);
const transactionDurationFormatted =
durationFormatter(transactionDuration).formatted;
const reasonMessage = formatTransactionDurationReason({
measured: transactionDuration,
serviceName: ruleParams.serviceName,
threshold: thresholdMicroseconds,
asDuration,
aggregationType: String(ruleParams.aggregationType),
windowSize: ruleParams.windowSize,
windowUnit: ruleParams.windowUnit,
});
services
.alertWithLifecycle({
id: `${AlertType.TransactionDuration}_${getEnvironmentLabel(
@ -191,15 +200,7 @@ export function registerTransactionDurationAlertType({
[PROCESSOR_EVENT]: ProcessorEvent.transaction,
[ALERT_EVALUATION_VALUE]: transactionDuration,
[ALERT_EVALUATION_THRESHOLD]: thresholdMicroseconds,
[ALERT_REASON]: formatTransactionDurationReason({
measured: transactionDuration,
serviceName: ruleParams.serviceName,
threshold: thresholdMicroseconds,
asDuration,
aggregationType: String(ruleParams.aggregationType),
windowSize: ruleParams.windowSize,
windowUnit: ruleParams.windowUnit,
}),
[ALERT_REASON]: reasonMessage,
},
})
.scheduleActions(alertTypeConfig.defaultActionGroupId, {
@ -209,6 +210,7 @@ export function registerTransactionDurationAlertType({
threshold: thresholdMicroseconds,
triggerValue: transactionDurationFormatted,
interval: `${ruleParams.windowSize}${ruleParams.windowUnit}`,
reason: reasonMessage,
});
}

View file

@ -179,7 +179,11 @@ describe('Transaction duration anomaly alert', () => {
ml,
});
const params = { anomalySeverityType: ANOMALY_SEVERITY.MINOR };
const params = {
anomalySeverityType: ANOMALY_SEVERITY.MINOR,
windowSize: 5,
windowUnit: 'm',
};
await executor({ params });
@ -195,6 +199,8 @@ describe('Transaction duration anomaly alert', () => {
environment: 'development',
threshold: 'minor',
triggerValue: 'critical',
reason:
'critical anomaly with a score of 80 was detected in the last 5 mins for foo.',
});
});
});

View file

@ -85,6 +85,7 @@ export function registerTransactionDurationAnomalyAlertType({
apmActionVariables.environment,
apmActionVariables.threshold,
apmActionVariables.triggerValue,
apmActionVariables.reason,
],
},
producer: 'apm',
@ -210,7 +211,13 @@ export function registerTransactionDurationAnomalyAlertType({
compact(anomalies).forEach((anomaly) => {
const { serviceName, environment, transactionType, score } = anomaly;
const severityLevel = getSeverity(score);
const reasonMessage = formatTransactionDurationAnomalyReason({
measured: score,
serviceName,
severityLevel,
windowSize: params.windowSize,
windowUnit: params.windowUnit,
});
services
.alertWithLifecycle({
id: [
@ -229,13 +236,7 @@ export function registerTransactionDurationAnomalyAlertType({
[ALERT_SEVERITY]: severityLevel,
[ALERT_EVALUATION_VALUE]: score,
[ALERT_EVALUATION_THRESHOLD]: threshold,
[ALERT_REASON]: formatTransactionDurationAnomalyReason({
measured: score,
serviceName,
severityLevel,
windowSize: params.windowSize,
windowUnit: params.windowUnit,
}),
[ALERT_REASON]: reasonMessage,
},
})
.scheduleActions(alertTypeConfig.defaultActionGroupId, {
@ -244,6 +245,7 @@ export function registerTransactionDurationAnomalyAlertType({
environment: getEnvironmentLabel(environment),
threshold: selectedOption?.label,
triggerValue: severityLevel,
reason: reasonMessage,
});
});

View file

@ -124,6 +124,8 @@ describe('Transaction error rate alert', () => {
serviceName: 'foo',
transactionType: 'type-foo',
environment: 'env-foo',
reason:
'Failed transactions is 10% in the last 5 mins for foo. Alert when > 10%.',
threshold: 10,
triggerValue: '10',
interval: '5m',

View file

@ -83,6 +83,7 @@ export function registerTransactionErrorRateAlertType({
apmActionVariables.threshold,
apmActionVariables.triggerValue,
apmActionVariables.interval,
apmActionVariables.reason,
],
},
producer: APM_SERVER_FEATURE_ID,
@ -198,7 +199,14 @@ export function registerTransactionErrorRateAlertType({
results.forEach((result) => {
const { serviceName, environment, transactionType, errorRate } =
result;
const reasonMessage = formatTransactionErrorRateReason({
threshold: ruleParams.threshold,
measured: errorRate,
asPercent,
serviceName,
windowSize: ruleParams.windowSize,
windowUnit: ruleParams.windowUnit,
});
services
.alertWithLifecycle({
id: [
@ -216,14 +224,7 @@ export function registerTransactionErrorRateAlertType({
[PROCESSOR_EVENT]: ProcessorEvent.transaction,
[ALERT_EVALUATION_VALUE]: errorRate,
[ALERT_EVALUATION_THRESHOLD]: ruleParams.threshold,
[ALERT_REASON]: formatTransactionErrorRateReason({
threshold: ruleParams.threshold,
measured: errorRate,
asPercent,
serviceName,
windowSize: ruleParams.windowSize,
windowUnit: ruleParams.windowUnit,
}),
[ALERT_REASON]: reasonMessage,
},
})
.scheduleActions(alertTypeConfig.defaultActionGroupId, {
@ -233,6 +234,7 @@ export function registerTransactionErrorRateAlertType({
threshold: ruleParams.threshold,
triggerValue: asDecimalOrInteger(errorRate),
interval: `${ruleParams.windowSize}${ruleParams.windowUnit}`,
reason: reasonMessage,
});
});