Onboard Latency Threshold rule type with FAAD (#179080)

Towards: https://github.com/elastic/kibana/issues/169867

This PR onboards Latency Threshold rule type with FAAD.

### To verify

1. Run the following script to generate APM data:
```
node scripts/synthtrace simple_trace.ts --local --live
```

2. Create a latency threshold rule.
Example:
```
POST kbn:/api/alerting/rule
{
  "params": {
    "aggregationType": "avg",
    "environment": "ENVIRONMENT_ALL",
    "threshold": 400,
    "windowSize": 5,
    "windowUnit": "m"
  },
  "consumer": "alerts",
  "schedule": {
    "interval": "1m"
  },
  "tags": [],
  "name": "testinggg",
  "rule_type_id": "apm.transaction_duration",
  "notify_when": "onActionGroupChange",
  "actions": []
}
```
3. Your rule should create an alert and should saved it in
`.internal.alerts-observability.apm.alerts-default-000001`
Example:
```
GET .internal.alerts-*/_search
```
4. Set `threshold: 10000`

5. The alert should be recovered and the AAD in the above index should
be updated `kibana.alert.status: recovered`.
This commit is contained in:
Alexi Doak 2024-03-26 08:48:59 -07:00 committed by GitHub
parent 8eb2fbd805
commit a936bf755f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 240 additions and 128 deletions

View file

@ -38,7 +38,7 @@ export enum AggregationType {
export const THRESHOLD_MET_GROUP_ID = 'threshold_met';
export type ThresholdMetActionGroupId = typeof THRESHOLD_MET_GROUP_ID;
const THRESHOLD_MET_GROUP: ActionGroup<ThresholdMetActionGroupId> = {
export const THRESHOLD_MET_GROUP: ActionGroup<ThresholdMetActionGroupId> = {
id: THRESHOLD_MET_GROUP_ID,
name: i18n.translate('xpack.apm.a.thresholdMet', {
defaultMessage: 'Threshold met',

View file

@ -90,6 +90,7 @@ export const ApmRuleTypeAlertDefinition: IRuleTypeAlerts = {
context: APM_RULE_TYPE_ALERT_CONTEXT,
mappings: { fieldMap: apmRuleTypeAlertFieldMap },
useLegacyAlerts: true,
shouldWrite: false,
};
export interface RegisterRuleDependencies {

View file

@ -10,8 +10,7 @@ import { createRuleTypeMocks } from '../../test_utils';
describe('registerTransactionDurationRuleType', () => {
it('sends alert when value is greater than threshold', async () => {
const { services, dependencies, executor, scheduleActions } =
createRuleTypeMocks();
const { services, dependencies, executor } = createRuleTypeMocks();
registerTransactionDurationRuleType(dependencies);
@ -44,6 +43,7 @@ describe('registerTransactionDurationRuleType', () => {
total: 1,
},
});
services.alertsClient.report.mockReturnValue({ uuid: 'test-uuid' });
const params = {
threshold: 3000,
@ -55,28 +55,41 @@ describe('registerTransactionDurationRuleType', () => {
transactionName: 'GET /orders',
};
await executor({ params });
expect(scheduleActions).toHaveBeenCalledTimes(1);
expect(scheduleActions).toHaveBeenCalledWith('threshold_met', {
alertDetailsUrl: expect.stringContaining(
'http://localhost:5601/eyr/app/observability/alerts/'
),
transactionName: 'GET /orders',
environment: 'development',
interval: `5 mins`,
reason:
'Avg. latency is 5.5 s in the last 5 mins for service: opbeans-java, env: development, type: request. Alert when > 3.0 s.',
transactionType: 'request',
serviceName: 'opbeans-java',
threshold: 3000,
triggerValue: '5,500 ms',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/opbeans-java?transactionType=request&environment=development',
expect(services.alertsClient.setAlertData).toHaveBeenCalledTimes(1);
expect(services.alertsClient.setAlertData).toHaveBeenCalledWith({
context: {
alertDetailsUrl: expect.stringContaining(
'http://localhost:5601/eyr/app/observability/alerts/'
),
environment: 'development',
interval: '5 mins',
reason:
'Avg. latency is 5.5 s in the last 5 mins for service: opbeans-java, env: development, type: request. Alert when > 3.0 s.',
serviceName: 'opbeans-java',
threshold: 3000,
transactionName: 'GET /orders',
transactionType: 'request',
triggerValue: '5,500 ms',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/opbeans-java?transactionType=request&environment=development',
},
id: 'opbeans-java_development_request',
payload: {
'kibana.alert.evaluation.threshold': 3000000,
'kibana.alert.evaluation.value': 5500000,
'kibana.alert.reason':
'Avg. latency is 5.5 s in the last 5 mins for service: opbeans-java, env: development, type: request. Alert when > 3.0 s.',
'processor.event': 'transaction',
'service.environment': 'development',
'service.name': 'opbeans-java',
'transaction.name': 'GET /orders',
'transaction.type': 'request',
},
});
});
it('sends alert when rule is configured with group by on transaction.name', async () => {
const { services, dependencies, executor, scheduleActions } =
createRuleTypeMocks();
const { services, dependencies, executor } = createRuleTypeMocks();
registerTransactionDurationRuleType(dependencies);
@ -109,6 +122,7 @@ describe('registerTransactionDurationRuleType', () => {
total: 1,
},
});
services.alertsClient.report.mockReturnValue({ uuid: 'test-uuid' });
const params = {
threshold: 3000,
@ -125,28 +139,41 @@ describe('registerTransactionDurationRuleType', () => {
],
};
await executor({ params });
expect(scheduleActions).toHaveBeenCalledTimes(1);
expect(scheduleActions).toHaveBeenCalledWith('threshold_met', {
alertDetailsUrl: expect.stringContaining(
'http://localhost:5601/eyr/app/observability/alerts/'
),
environment: 'development',
interval: `5 mins`,
reason:
'Avg. latency is 5.5 s in the last 5 mins for service: opbeans-java, env: development, type: request, name: GET /products. Alert when > 3.0 s.',
transactionType: 'request',
serviceName: 'opbeans-java',
threshold: 3000,
triggerValue: '5,500 ms',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/opbeans-java?transactionType=request&environment=development',
transactionName: 'GET /products',
expect(services.alertsClient.setAlertData).toHaveBeenCalledTimes(1);
expect(services.alertsClient.setAlertData).toHaveBeenCalledWith({
context: {
alertDetailsUrl: expect.stringContaining(
'http://localhost:5601/eyr/app/observability/alerts/'
),
environment: 'development',
interval: '5 mins',
reason:
'Avg. latency is 5.5 s in the last 5 mins for service: opbeans-java, env: development, type: request, name: GET /products. Alert when > 3.0 s.',
serviceName: 'opbeans-java',
threshold: 3000,
transactionName: 'GET /products',
transactionType: 'request',
triggerValue: '5,500 ms',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/opbeans-java?transactionType=request&environment=development',
},
id: 'opbeans-java_development_request_GET /products',
payload: {
'kibana.alert.evaluation.threshold': 3000000,
'kibana.alert.evaluation.value': 5500000,
'kibana.alert.reason':
'Avg. latency is 5.5 s in the last 5 mins for service: opbeans-java, env: development, type: request, name: GET /products. Alert when > 3.0 s.',
'processor.event': 'transaction',
'service.environment': 'development',
'service.name': 'opbeans-java',
'transaction.name': 'GET /products',
'transaction.type': 'request',
},
});
});
it('sends alert when rule is configured with preselected group by', async () => {
const { services, dependencies, executor, scheduleActions } =
createRuleTypeMocks();
const { services, dependencies, executor } = createRuleTypeMocks();
registerTransactionDurationRuleType(dependencies);
@ -179,6 +206,7 @@ describe('registerTransactionDurationRuleType', () => {
total: 1,
},
});
services.alertsClient.report.mockReturnValue({ uuid: 'test-uuid' });
const params = {
threshold: 3000,
@ -191,27 +219,41 @@ describe('registerTransactionDurationRuleType', () => {
};
await executor({ params });
expect(scheduleActions).toHaveBeenCalledTimes(1);
expect(scheduleActions).toHaveBeenCalledWith('threshold_met', {
alertDetailsUrl: expect.stringContaining(
'http://localhost:5601/eyr/app/observability/alerts/'
),
environment: 'development',
interval: `5 mins`,
reason:
'Avg. latency is 5.5 s in the last 5 mins for service: opbeans-java, env: development, type: request. Alert when > 3.0 s.',
transactionType: 'request',
serviceName: 'opbeans-java',
threshold: 3000,
triggerValue: '5,500 ms',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/opbeans-java?transactionType=request&environment=development',
expect(services.alertsClient.setAlertData).toHaveBeenCalledTimes(1);
expect(services.alertsClient.setAlertData).toHaveBeenCalledWith({
context: {
alertDetailsUrl: expect.stringContaining(
'http://localhost:5601/eyr/app/observability/alerts/'
),
environment: 'development',
interval: '5 mins',
reason:
'Avg. latency is 5.5 s in the last 5 mins for service: opbeans-java, env: development, type: request. Alert when > 3.0 s.',
serviceName: 'opbeans-java',
threshold: 3000,
transactionName: undefined,
transactionType: 'request',
triggerValue: '5,500 ms',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/opbeans-java?transactionType=request&environment=development',
},
id: 'opbeans-java_development_request',
payload: {
'kibana.alert.evaluation.threshold': 3000000,
'kibana.alert.evaluation.value': 5500000,
'kibana.alert.reason':
'Avg. latency is 5.5 s in the last 5 mins for service: opbeans-java, env: development, type: request. Alert when > 3.0 s.',
'processor.event': 'transaction',
'service.environment': 'development',
'service.name': 'opbeans-java',
'transaction.name': undefined,
'transaction.type': 'request',
},
});
});
it('sends alert when service.environment field does not exist in the source', async () => {
const { services, dependencies, executor, scheduleActions } =
createRuleTypeMocks();
const { services, dependencies, executor } = createRuleTypeMocks();
registerTransactionDurationRuleType(dependencies);
@ -249,6 +291,7 @@ describe('registerTransactionDurationRuleType', () => {
total: 1,
},
});
services.alertsClient.report.mockReturnValue({ uuid: 'test-uuid' });
const params = {
threshold: 3000,
@ -265,28 +308,41 @@ describe('registerTransactionDurationRuleType', () => {
],
};
await executor({ params });
expect(scheduleActions).toHaveBeenCalledTimes(1);
expect(scheduleActions).toHaveBeenCalledWith('threshold_met', {
alertDetailsUrl: expect.stringContaining(
'http://localhost:5601/eyr/app/observability/alerts/'
),
environment: 'Not defined',
interval: `5 mins`,
reason:
'Avg. latency is 5.5 s in the last 5 mins for service: opbeans-java, env: Not defined, type: request, name: tx-java. Alert when > 3.0 s.',
transactionType: 'request',
serviceName: 'opbeans-java',
threshold: 3000,
triggerValue: '5,500 ms',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/opbeans-java?transactionType=request&environment=ENVIRONMENT_ALL',
transactionName: 'tx-java',
expect(services.alertsClient.setAlertData).toHaveBeenCalledTimes(1);
expect(services.alertsClient.setAlertData).toHaveBeenCalledWith({
context: {
alertDetailsUrl: expect.stringContaining(
'http://localhost:5601/eyr/app/observability/alerts/'
),
environment: 'Not defined',
interval: '5 mins',
reason:
'Avg. latency is 5.5 s in the last 5 mins for service: opbeans-java, env: Not defined, type: request, name: tx-java. Alert when > 3.0 s.',
serviceName: 'opbeans-java',
threshold: 3000,
transactionName: 'tx-java',
transactionType: 'request',
triggerValue: '5,500 ms',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/opbeans-java?transactionType=request&environment=ENVIRONMENT_ALL',
},
id: 'opbeans-java_ENVIRONMENT_NOT_DEFINED_request_tx-java',
payload: {
'kibana.alert.evaluation.threshold': 3000000,
'kibana.alert.evaluation.value': 5500000,
'kibana.alert.reason':
'Avg. latency is 5.5 s in the last 5 mins for service: opbeans-java, env: Not defined, type: request, name: tx-java. Alert when > 3.0 s.',
'processor.event': 'transaction',
'service.environment': 'ENVIRONMENT_NOT_DEFINED',
'service.name': 'opbeans-java',
'transaction.name': 'tx-java',
'transaction.type': 'request',
},
});
});
it('sends alert when rule is configured with a filter query', async () => {
const { services, dependencies, executor, scheduleActions } =
createRuleTypeMocks();
const { services, dependencies, executor } = createRuleTypeMocks();
registerTransactionDurationRuleType(dependencies);
@ -319,6 +375,7 @@ describe('registerTransactionDurationRuleType', () => {
total: 1,
},
});
services.alertsClient.report.mockReturnValue({ uuid: 'test-uuid' });
const params = {
threshold: 3000,
@ -337,21 +394,36 @@ describe('registerTransactionDurationRuleType', () => {
};
await executor({ params });
expect(scheduleActions).toHaveBeenCalledTimes(1);
expect(scheduleActions).toHaveBeenCalledWith('threshold_met', {
alertDetailsUrl: expect.stringContaining(
'http://localhost:5601/eyr/app/observability/alerts/'
),
environment: 'development',
interval: `5 mins`,
reason:
'Avg. latency is 5.5 s in the last 5 mins for service: opbeans-java, env: development, type: request. Alert when > 3.0 s.',
transactionType: 'request',
serviceName: 'opbeans-java',
threshold: 3000,
triggerValue: '5,500 ms',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/opbeans-java?transactionType=request&environment=development',
expect(services.alertsClient.setAlertData).toHaveBeenCalledTimes(1);
expect(services.alertsClient.setAlertData).toHaveBeenCalledWith({
context: {
alertDetailsUrl: expect.stringContaining(
'http://localhost:5601/eyr/app/observability/alerts/'
),
environment: 'development',
interval: '5 mins',
reason:
'Avg. latency is 5.5 s in the last 5 mins for service: opbeans-java, env: development, type: request. Alert when > 3.0 s.',
serviceName: 'opbeans-java',
threshold: 3000,
transactionName: undefined,
transactionType: 'request',
triggerValue: '5,500 ms',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/opbeans-java?transactionType=request&environment=development',
},
id: 'opbeans-java_development_request',
payload: {
'kibana.alert.evaluation.threshold': 3000000,
'kibana.alert.evaluation.value': 5500000,
'kibana.alert.reason':
'Avg. latency is 5.5 s in the last 5 mins for service: opbeans-java, env: development, type: request. Alert when > 3.0 s.',
'processor.event': 'transaction',
'service.environment': 'development',
'service.name': 'opbeans-java',
'transaction.name': undefined,
'transaction.type': 'request',
},
});
});
});

View file

@ -7,7 +7,16 @@
import { DEFAULT_APP_CATEGORIES } from '@kbn/core/server';
import { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { GetViewInAppRelativeUrlFnOpts } from '@kbn/alerting-plugin/server';
import {
AlertsClientError,
GetViewInAppRelativeUrlFnOpts,
ActionGroupIdsOf,
AlertInstanceContext as AlertContext,
AlertInstanceState as AlertState,
RuleTypeState,
RuleExecutorOptions,
IRuleTypeAlerts,
} from '@kbn/alerting-plugin/server';
import {
asDuration,
formatDurationFromTimeUnitChar,
@ -26,7 +35,7 @@ import {
ALERT_REASON,
ApmRuleType,
} from '@kbn/rule-data-utils';
import { createLifecycleRuleTypeFactory } from '@kbn/rule-registry-plugin/server';
import { ObservabilityApmAlert } from '@kbn/alerts-as-data-utils';
import { addSpaceIdToPath } from '@kbn/spaces-plugin/common';
import { getGroupByTerms } from '../utils/get_groupby_terms';
import { SearchAggregatedTransactionSetting } from '../../../../../common/aggregated_transactions';
@ -42,8 +51,12 @@ import {
APM_SERVER_FEATURE_ID,
formatTransactionDurationReason,
RULE_TYPES_CONFIG,
THRESHOLD_MET_GROUP,
} from '../../../../../common/rules/apm_rule_types';
import { transactionDurationParamsSchema } from '../../../../../common/rules/schema';
import {
transactionDurationParamsSchema,
ApmRuleParamsType,
} from '../../../../../common/rules/schema';
import { environmentQuery } from '../../../../../common/utils/environment_query';
import {
getAlertUrlTransaction,
@ -85,20 +98,29 @@ export const transactionDurationActionVariables = [
apmActionVariables.viewInAppUrl,
];
type TransactionDurationRuleTypeParams =
ApmRuleParamsType[ApmRuleType.TransactionDuration];
type TransactionDurationActionGroups = ActionGroupIdsOf<
typeof THRESHOLD_MET_GROUP
>;
type TransactionDurationRuleTypeState = RuleTypeState;
type TransactionDurationAlertState = AlertState;
type TransactionDurationAlertContext = AlertContext;
type TransactionDurationAlert = ObservabilityApmAlert;
export function registerTransactionDurationRuleType({
alerting,
apmConfig,
ruleDataClient,
getApmIndices,
logger,
basePath,
}: RegisterRuleDependencies) {
const createLifecycleRuleType = createLifecycleRuleTypeFactory({
ruleDataClient,
logger,
});
if (!alerting) {
throw new Error(
'Cannot register transaction duration rule type. Both the actions and alerting plugins need to be enabled.'
);
}
const ruleType = createLifecycleRuleType({
alerting.registerType({
id: ApmRuleType.TransactionDuration,
name: ruleTypeConfig.name,
actionGroups: ruleTypeConfig.actionGroups,
@ -117,20 +139,28 @@ export function registerTransactionDurationRuleType({
producer: APM_SERVER_FEATURE_ID,
minimumLicenseRequired: 'basic',
isExportable: true,
executor: async ({
params: ruleParams,
services,
spaceId,
getTimeRange,
}) => {
executor: async (
options: RuleExecutorOptions<
TransactionDurationRuleTypeParams,
TransactionDurationRuleTypeState,
TransactionDurationAlertState,
TransactionDurationAlertContext,
TransactionDurationActionGroups,
TransactionDurationAlert
>
) => {
const { params: ruleParams, services, spaceId, getTimeRange } = options;
const { alertsClient, savedObjectsClient, scopedClusterClient } =
services;
if (!alertsClient) {
throw new AlertsClientError();
}
const allGroupByFields = getAllGroupByFields(
ApmRuleType.TransactionDuration,
ruleParams.groupBy
);
const { getAlertUuid, savedObjectsClient, scopedClusterClient } =
services;
const indices = await getApmIndices(savedObjectsClient);
// only query transaction events when set to 'never',
@ -275,25 +305,12 @@ export function registerTransactionDurationRuleType({
});
const alertId = bucketKey.join('_');
const alert = services.alertWithLifecycle({
const { uuid } = alertsClient.report({
id: alertId,
fields: {
[TRANSACTION_NAME]: ruleParams.transactionName,
[PROCESSOR_EVENT]: ProcessorEvent.transaction,
[ALERT_EVALUATION_VALUE]: transactionDuration,
[ALERT_EVALUATION_THRESHOLD]: thresholdMicroseconds,
[ALERT_REASON]: reason,
...sourceFields,
...groupByFields,
},
actionGroup: ruleTypeConfig.defaultActionGroupId,
});
const alertUuid = getAlertUuid(alertId);
const alertDetailsUrl = getAlertDetailsUrl(
basePath,
spaceId,
alertUuid
);
const alertDetailsUrl = getAlertDetailsUrl(basePath, spaceId, uuid);
const viewInAppUrl = addSpaceIdToPath(
basePath.publicBaseUrl,
spaceId,
@ -306,7 +323,8 @@ export function registerTransactionDurationRuleType({
)
);
const groupByActionVariables = getGroupByActionVariables(groupByFields);
alert.scheduleActions(ruleTypeConfig.defaultActionGroupId, {
const context = {
alertDetailsUrl,
interval: formatDurationFromTimeUnitChar(
ruleParams.windowSize,
@ -319,15 +337,32 @@ export function registerTransactionDurationRuleType({
triggerValue: transactionDurationFormatted,
viewInAppUrl,
...groupByActionVariables,
};
const payload = {
[TRANSACTION_NAME]: ruleParams.transactionName,
[PROCESSOR_EVENT]: ProcessorEvent.transaction,
[ALERT_EVALUATION_VALUE]: transactionDuration,
[ALERT_EVALUATION_THRESHOLD]: thresholdMicroseconds,
[ALERT_REASON]: reason,
...sourceFields,
...groupByFields,
};
alertsClient.setAlertData({
id: alertId,
payload,
context,
});
}
return { state: {} };
},
alerts: ApmRuleTypeAlertDefinition,
alerts: {
...ApmRuleTypeAlertDefinition,
shouldWrite: true,
} as IRuleTypeAlerts<TransactionDurationAlert>,
getViewInAppRelativeUrl: ({ rule }: GetViewInAppRelativeUrlFnOpts<{}>) =>
observabilityPaths.ruleDetails(rule.id),
});
alerting.registerType(ruleType);
}

View file

@ -47,6 +47,10 @@ export const createRuleTypeMocks = () => {
alertWithLifecycle: jest.fn(),
logger: loggerMock,
shouldWriteAlerts: () => true,
alertsClient: {
report: jest.fn(),
setAlertData: jest.fn(),
},
};
const dependencies = {