Onboard Transaction Error Rate rule type with FAAD (#179496)

Towards: https://github.com/elastic/kibana/issues/169867

This PR onboards the Transaction Error Rate rule type with FAAD.

### To verify

1. Run the following script to generate APM data:
```
node scripts/synthtrace many_errors.ts --local --live
```

2. Create a transaction error rate rule.
Example:
```
POST kbn:/api/alerting/rule
{
  "params": {
    "threshold": 0,
    "windowSize": 5,
    "windowUnit": "m",
    "environment": "ENVIRONMENT_ALL"
  },
  "consumer": "alerts",
  "schedule": {
    "interval": "1m"
  },
  "tags": [],
  "name": "test",
  "rule_type_id": "apm.transaction_error_rate",
  "notify_when": "onActionGroupChange",
  "actions": []
}
```
3. Your rule should create an alert and should saved it in
`.internal.alerts-observability.apm.alerts-default-000001`
Example:
```
GET .internal.alerts-*/_search
```
4. Recover the alert by setting `threshold: 200`

5. The alert should be recovered and the AAD in the above index should
be updated `kibana.alert.status: recovered`.
This commit is contained in:
Alexi Doak 2024-04-18 08:44:16 -07:00 committed by GitHub
parent 5be49cac39
commit 48e61e8fa1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 431 additions and 802 deletions

View file

@ -15,7 +15,7 @@ import {
import { ObservabilityPluginSetup } from '@kbn/observability-plugin/server';
import { IRuleDataClient } from '@kbn/rule-registry-plugin/server';
import { MlPluginSetup } from '@kbn/ml-plugin/server';
import { legacyExperimentalFieldMap } from '@kbn/alerts-as-data-utils';
import { legacyExperimentalFieldMap, ObservabilityApmAlert } from '@kbn/alerts-as-data-utils';
import type { APMIndices } from '@kbn/apm-data-access-plugin/server';
import {
AGENT_NAME,
@ -92,11 +92,11 @@ export const apmRuleTypeAlertFieldMap = {
};
// Defines which alerts-as-data index alerts will use
export const ApmRuleTypeAlertDefinition: IRuleTypeAlerts = {
export const ApmRuleTypeAlertDefinition: IRuleTypeAlerts<ObservabilityApmAlert> = {
context: APM_RULE_TYPE_ALERT_CONTEXT,
mappings: { fieldMap: apmRuleTypeAlertFieldMap },
useLegacyAlerts: true,
shouldWrite: false,
shouldWrite: true,
};
export interface RegisterRuleDependencies {

View file

@ -14,7 +14,6 @@ import {
RuleTypeState,
RuleExecutorOptions,
AlertsClientError,
IRuleTypeAlerts,
} from '@kbn/alerting-plugin/server';
import { KibanaRequest, DEFAULT_APP_CATEGORIES } from '@kbn/core/server';
import datemath from '@kbn/datemath';
@ -359,10 +358,7 @@ export function registerAnomalyRuleType({
return { state: {} };
},
alerts: {
...ApmRuleTypeAlertDefinition,
shouldWrite: true,
} as IRuleTypeAlerts<AnomalyAlert>,
alerts: ApmRuleTypeAlertDefinition,
getViewInAppRelativeUrl: ({ rule }: GetViewInAppRelativeUrlFnOpts<{}>) =>
observabilityPaths.ruleDetails(rule.id),
});

View file

@ -14,7 +14,6 @@ import {
RuleTypeState,
RuleExecutorOptions,
AlertsClientError,
IRuleTypeAlerts,
} from '@kbn/alerting-plugin/server';
import {
formatDurationFromTimeUnitChar,
@ -280,10 +279,7 @@ export function registerErrorCountRuleType({
return { state: {} };
},
alerts: {
...ApmRuleTypeAlertDefinition,
shouldWrite: true,
} as IRuleTypeAlerts<ErrorCountAlert>,
alerts: ApmRuleTypeAlertDefinition,
getViewInAppRelativeUrl: ({ rule }: GetViewInAppRelativeUrlFnOpts<{}>) =>
observabilityPaths.ruleDetails(rule.id),
});

View file

@ -15,7 +15,6 @@ import {
AlertInstanceState as AlertState,
RuleTypeState,
RuleExecutorOptions,
IRuleTypeAlerts,
} from '@kbn/alerting-plugin/server';
import {
asDuration,
@ -328,10 +327,7 @@ export function registerTransactionDurationRuleType({
return { state: {} };
},
alerts: {
...ApmRuleTypeAlertDefinition,
shouldWrite: true,
} as IRuleTypeAlerts<TransactionDurationAlert>,
alerts: ApmRuleTypeAlertDefinition,
getViewInAppRelativeUrl: ({ rule }: GetViewInAppRelativeUrlFnOpts<{}>) =>
observabilityPaths.ruleDetails(rule.id),
});

View file

@ -42,11 +42,11 @@ describe('Transaction error rate alert', () => {
});
await executor({ params });
expect(services.alertFactory.create).not.toBeCalled();
expect(services.alertsClient.report).not.toBeCalled();
});
it('sends alerts for services that exceeded the threshold', async () => {
const { services, dependencies, executor, scheduleActions } = createRuleTypeMocks();
const { services, dependencies, executor } = createRuleTypeMocks();
registerTransactionErrorRateRuleType({
...dependencies,
@ -106,6 +106,8 @@ describe('Transaction error rate alert', () => {
},
});
services.alertsClient.report.mockReturnValue({ uuid: 'test-uuid' });
const params = {
threshold: 10,
windowSize: 5,
@ -114,28 +116,49 @@ describe('Transaction error rate alert', () => {
await executor({ params });
expect(services.alertFactory.create).toHaveBeenCalledTimes(1);
expect(services.alertsClient.report).toHaveBeenCalledTimes(1);
expect(services.alertFactory.create).toHaveBeenCalledWith('foo_env-foo_type-foo');
expect(services.alertFactory.create).not.toHaveBeenCalledWith('bar_env-bar_type-bar');
expect(services.alertsClient.report).toHaveBeenCalledWith({
actionGroup: 'threshold_met',
id: 'foo_env-foo_type-foo',
});
expect(services.alertsClient.report).not.toHaveBeenCalledWith({
actionGroup: 'threshold_met',
id: 'bar_env-bar_type-bar',
});
expect(scheduleActions).toHaveBeenCalledWith('threshold_met', {
serviceName: 'foo',
transactionType: 'type-foo',
environment: 'env-foo',
reason:
'Failed transactions is 10% in the last 5 mins for service: foo, env: env-foo, type: type-foo. Alert when > 10%.',
threshold: 10,
triggerValue: '10',
interval: '5 mins',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/foo?transactionType=type-foo&environment=env-foo',
alertDetailsUrl: 'mockedAlertsLocator > getLocation',
expect(services.alertsClient.setAlertData).toHaveBeenCalledWith({
context: {
alertDetailsUrl: 'mockedAlertsLocator > getLocation',
environment: 'env-foo',
interval: '5 mins',
reason:
'Failed transactions is 10% in the last 5 mins for service: foo, env: env-foo, type: type-foo. Alert when > 10%.',
serviceName: 'foo',
threshold: 10,
transactionName: undefined,
transactionType: 'type-foo',
triggerValue: '10',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/foo?transactionType=type-foo&environment=env-foo',
},
id: 'foo_env-foo_type-foo',
payload: {
'kibana.alert.evaluation.threshold': 10,
'kibana.alert.evaluation.value': 10,
'kibana.alert.reason':
'Failed transactions is 10% in the last 5 mins for service: foo, env: env-foo, type: type-foo. Alert when > 10%.',
'processor.event': 'transaction',
'service.environment': 'env-foo',
'service.name': 'foo',
'transaction.name': undefined,
'transaction.type': 'type-foo',
},
});
});
it('sends alert when rule is configured with group by on transaction.name', async () => {
const { services, dependencies, executor, scheduleActions } = createRuleTypeMocks();
const { services, dependencies, executor } = createRuleTypeMocks();
registerTransactionErrorRateRuleType({
...dependencies,
@ -195,6 +218,8 @@ describe('Transaction error rate alert', () => {
},
});
services.alertsClient.report.mockReturnValue({ uuid: 'test-uuid' });
const params = {
threshold: 10,
windowSize: 5,
@ -204,31 +229,49 @@ describe('Transaction error rate alert', () => {
await executor({ params });
expect(services.alertFactory.create).toHaveBeenCalledTimes(1);
expect(services.alertsClient.report).toHaveBeenCalledTimes(1);
expect(services.alertFactory.create).toHaveBeenCalledWith('foo_env-foo_type-foo_tx-name-foo');
expect(services.alertFactory.create).not.toHaveBeenCalledWith(
'bar_env-bar_type-bar_tx-name-bar'
);
expect(services.alertsClient.report).toHaveBeenCalledWith({
actionGroup: 'threshold_met',
id: 'foo_env-foo_type-foo_tx-name-foo',
});
expect(services.alertsClient.report).not.toHaveBeenCalledWith({
actionGroup: 'threshold_met',
id: 'bar_env-bar_type-bar_tx-name-bar',
});
expect(scheduleActions).toHaveBeenCalledWith('threshold_met', {
serviceName: 'foo',
transactionType: 'type-foo',
environment: 'env-foo',
reason:
'Failed transactions is 10% in the last 5 mins for service: foo, env: env-foo, type: type-foo, name: tx-name-foo. Alert when > 10%.',
threshold: 10,
triggerValue: '10',
interval: '5 mins',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/foo?transactionType=type-foo&environment=env-foo',
transactionName: 'tx-name-foo',
alertDetailsUrl: 'mockedAlertsLocator > getLocation',
expect(services.alertsClient.setAlertData).toHaveBeenCalledWith({
context: {
alertDetailsUrl: 'mockedAlertsLocator > getLocation',
environment: 'env-foo',
interval: '5 mins',
reason:
'Failed transactions is 10% in the last 5 mins for service: foo, env: env-foo, type: type-foo, name: tx-name-foo. Alert when > 10%.',
serviceName: 'foo',
threshold: 10,
transactionName: 'tx-name-foo',
transactionType: 'type-foo',
triggerValue: '10',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/foo?transactionType=type-foo&environment=env-foo',
},
id: 'foo_env-foo_type-foo_tx-name-foo',
payload: {
'kibana.alert.evaluation.threshold': 10,
'kibana.alert.evaluation.value': 10,
'kibana.alert.reason':
'Failed transactions is 10% in the last 5 mins for service: foo, env: env-foo, type: type-foo, name: tx-name-foo. Alert when > 10%.',
'processor.event': 'transaction',
'service.environment': 'env-foo',
'service.name': 'foo',
'transaction.name': 'tx-name-foo',
'transaction.type': 'type-foo',
},
});
});
it('sends alert when rule is configured with preselected group by', async () => {
const { services, dependencies, executor, scheduleActions } = createRuleTypeMocks();
const { services, dependencies, executor } = createRuleTypeMocks();
registerTransactionErrorRateRuleType({
...dependencies,
@ -295,30 +338,53 @@ describe('Transaction error rate alert', () => {
groupBy: ['service.name', 'service.environment', 'transaction.type'],
};
services.alertsClient.report.mockReturnValue({ uuid: 'test-uuid' });
await executor({ params });
expect(services.alertFactory.create).toHaveBeenCalledTimes(1);
expect(services.alertsClient.report).toHaveBeenCalledTimes(1);
expect(services.alertFactory.create).toHaveBeenCalledWith('foo_env-foo_type-foo');
expect(services.alertFactory.create).not.toHaveBeenCalledWith('bar_env-bar_type-bar');
expect(services.alertsClient.report).toHaveBeenCalledWith({
actionGroup: 'threshold_met',
id: 'foo_env-foo_type-foo',
});
expect(services.alertsClient.report).not.toHaveBeenCalledWith({
actionGroup: 'threshold_met',
id: 'bar_env-bar_type-bar',
});
expect(scheduleActions).toHaveBeenCalledWith('threshold_met', {
serviceName: 'foo',
transactionType: 'type-foo',
environment: 'env-foo',
reason:
'Failed transactions is 10% in the last 5 mins for service: foo, env: env-foo, type: type-foo. Alert when > 10%.',
threshold: 10,
triggerValue: '10',
interval: '5 mins',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/foo?transactionType=type-foo&environment=env-foo',
alertDetailsUrl: 'mockedAlertsLocator > getLocation',
expect(services.alertsClient.setAlertData).toHaveBeenCalledWith({
context: {
alertDetailsUrl: 'mockedAlertsLocator > getLocation',
environment: 'env-foo',
interval: '5 mins',
reason:
'Failed transactions is 10% in the last 5 mins for service: foo, env: env-foo, type: type-foo. Alert when > 10%.',
serviceName: 'foo',
threshold: 10,
transactionName: undefined,
transactionType: 'type-foo',
triggerValue: '10',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/foo?transactionType=type-foo&environment=env-foo',
},
id: 'foo_env-foo_type-foo',
payload: {
'kibana.alert.evaluation.threshold': 10,
'kibana.alert.evaluation.value': 10,
'kibana.alert.reason':
'Failed transactions is 10% in the last 5 mins for service: foo, env: env-foo, type: type-foo. Alert when > 10%.',
'processor.event': 'transaction',
'service.environment': 'env-foo',
'service.name': 'foo',
'transaction.name': undefined,
'transaction.type': 'type-foo',
},
});
});
it('sends alert when service.environment field does not exist in the source', async () => {
const { services, dependencies, executor, scheduleActions } = createRuleTypeMocks();
const { services, dependencies, executor } = createRuleTypeMocks();
registerTransactionErrorRateRuleType({
...dependencies,
@ -378,6 +444,8 @@ describe('Transaction error rate alert', () => {
},
});
services.alertsClient.report.mockReturnValue({ uuid: 'test-uuid' });
const params = {
threshold: 10,
windowSize: 5,
@ -387,32 +455,49 @@ describe('Transaction error rate alert', () => {
await executor({ params });
expect(services.alertFactory.create).toHaveBeenCalledTimes(1);
expect(services.alertsClient.report).toHaveBeenCalledTimes(1);
expect(services.alertFactory.create).toHaveBeenCalledWith(
'foo_ENVIRONMENT_NOT_DEFINED_type-foo'
);
expect(services.alertFactory.create).not.toHaveBeenCalledWith(
'bar_ENVIRONMENT_NOT_DEFINED_type-bar'
);
expect(services.alertsClient.report).toHaveBeenCalledWith({
actionGroup: 'threshold_met',
id: 'foo_ENVIRONMENT_NOT_DEFINED_type-foo',
});
expect(services.alertsClient.report).not.toHaveBeenCalledWith({
actionGroup: 'threshold_met',
id: 'bar_ENVIRONMENT_NOT_DEFINED_type-bar',
});
expect(scheduleActions).toHaveBeenCalledWith('threshold_met', {
serviceName: 'foo',
transactionType: 'type-foo',
environment: 'Not defined',
reason:
'Failed transactions is 10% in the last 5 mins for service: foo, env: Not defined, type: type-foo. Alert when > 10%.',
threshold: 10,
triggerValue: '10',
interval: '5 mins',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/foo?transactionType=type-foo&environment=ENVIRONMENT_ALL',
alertDetailsUrl: 'mockedAlertsLocator > getLocation',
expect(services.alertsClient.setAlertData).toHaveBeenCalledWith({
context: {
alertDetailsUrl: 'mockedAlertsLocator > getLocation',
environment: 'Not defined',
interval: '5 mins',
reason:
'Failed transactions is 10% in the last 5 mins for service: foo, env: Not defined, type: type-foo. Alert when > 10%.',
serviceName: 'foo',
threshold: 10,
transactionName: undefined,
transactionType: 'type-foo',
triggerValue: '10',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/foo?transactionType=type-foo&environment=ENVIRONMENT_ALL',
},
id: 'foo_ENVIRONMENT_NOT_DEFINED_type-foo',
payload: {
'kibana.alert.evaluation.threshold': 10,
'kibana.alert.evaluation.value': 10,
'kibana.alert.reason':
'Failed transactions is 10% in the last 5 mins for service: foo, env: Not defined, type: type-foo. Alert when > 10%.',
'processor.event': 'transaction',
'service.environment': 'ENVIRONMENT_NOT_DEFINED',
'service.name': 'foo',
'transaction.name': undefined,
'transaction.type': 'type-foo',
},
});
});
it('sends alert when rule is configured with a filter query', async () => {
const { services, dependencies, executor, scheduleActions } = createRuleTypeMocks();
const { services, dependencies, executor } = createRuleTypeMocks();
registerTransactionErrorRateRuleType({
...dependencies,
@ -457,6 +542,8 @@ describe('Transaction error rate alert', () => {
},
});
services.alertsClient.report.mockReturnValue({ uuid: 'test-uuid' });
const params = {
threshold: 10,
windowSize: 5,
@ -473,22 +560,40 @@ describe('Transaction error rate alert', () => {
await executor({ params });
expect(services.alertFactory.create).toHaveBeenCalledTimes(1);
expect(services.alertsClient.report).toHaveBeenCalledTimes(1);
expect(services.alertFactory.create).toHaveBeenCalledWith('bar_env-bar_type-bar');
expect(services.alertsClient.report).toHaveBeenCalledWith({
actionGroup: 'threshold_met',
id: 'bar_env-bar_type-bar',
});
expect(scheduleActions).toHaveBeenCalledWith('threshold_met', {
serviceName: 'bar',
transactionType: 'type-bar',
environment: 'env-bar',
reason:
'Failed transactions is 10% in the last 5 mins for service: bar, env: env-bar, type: type-bar. Alert when > 10%.',
threshold: 10,
triggerValue: '10',
interval: '5 mins',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/bar?transactionType=type-bar&environment=env-bar',
alertDetailsUrl: 'mockedAlertsLocator > getLocation',
expect(services.alertsClient.setAlertData).toHaveBeenCalledWith({
context: {
alertDetailsUrl: 'mockedAlertsLocator > getLocation',
environment: 'env-bar',
interval: '5 mins',
reason:
'Failed transactions is 10% in the last 5 mins for service: bar, env: env-bar, type: type-bar. Alert when > 10%.',
serviceName: 'bar',
threshold: 10,
transactionName: undefined,
transactionType: 'type-bar',
triggerValue: '10',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/bar?transactionType=type-bar&environment=env-bar',
},
id: 'bar_env-bar_type-bar',
payload: {
'kibana.alert.evaluation.threshold': 10,
'kibana.alert.evaluation.value': 10,
'kibana.alert.reason':
'Failed transactions is 10% in the last 5 mins for service: bar, env: env-bar, type: type-bar. Alert when > 10%.',
'processor.event': 'transaction',
'service.environment': 'env-bar',
'service.name': 'bar',
'transaction.name': undefined,
'transaction.type': 'type-bar',
},
});
});
});

View file

@ -6,7 +6,15 @@
*/
import { DEFAULT_APP_CATEGORIES } from '@kbn/core/server';
import { GetViewInAppRelativeUrlFnOpts } from '@kbn/alerting-plugin/server';
import {
GetViewInAppRelativeUrlFnOpts,
ActionGroupIdsOf,
AlertInstanceContext as AlertContext,
AlertInstanceState as AlertState,
RuleTypeState,
RuleExecutorOptions,
AlertsClientError,
} from '@kbn/alerting-plugin/server';
import {
formatDurationFromTimeUnitChar,
getAlertUrl,
@ -22,7 +30,7 @@ import {
ALERT_REASON,
ApmRuleType,
} from '@kbn/rule-data-utils';
import { createLifecycleRuleTypeFactory } from '@kbn/rule-registry-plugin/server';
import { ObservabilityApmAlert } from '@kbn/alerts-as-data-utils';
import { addSpaceIdToPath } from '@kbn/spaces-plugin/common';
import { asyncForEach } from '@kbn/std';
import { SearchAggregatedTransactionSetting } from '../../../../../common/aggregated_transactions';
@ -40,8 +48,12 @@ import {
APM_SERVER_FEATURE_ID,
formatTransactionErrorRateReason,
RULE_TYPES_CONFIG,
THRESHOLD_MET_GROUP,
} from '../../../../../common/rules/apm_rule_types';
import { transactionErrorRateParamsSchema } from '../../../../../common/rules/schema';
import {
transactionErrorRateParamsSchema,
ApmRuleParamsType,
} from '../../../../../common/rules/schema';
import { environmentQuery } from '../../../../../common/utils/environment_query';
import { asDecimalOrInteger, getAlertUrlTransaction } from '../../../../../common/utils/formatters';
import { getBackwardCompatibleDocumentTypeFilter } from '../../../../lib/helpers/transactions';
@ -74,6 +86,13 @@ export const transactionErrorRateActionVariables = [
apmActionVariables.viewInAppUrl,
];
type TransactionErrorRateRuleTypeParams = ApmRuleParamsType[ApmRuleType.TransactionErrorRate];
type TransactionErrorRateActionGroups = ActionGroupIdsOf<typeof THRESHOLD_MET_GROUP>;
type TransactionErrorRateRuleTypeState = RuleTypeState;
type TransactionErrorRateAlertState = AlertState;
type TransactionErrorRateAlertContext = AlertContext;
type TransactionErrorRateAlert = ObservabilityApmAlert;
export function registerTransactionErrorRateRuleType({
alerting,
alertsLocator,
@ -83,218 +102,236 @@ export function registerTransactionErrorRateRuleType({
logger,
ruleDataClient,
}: RegisterRuleDependencies) {
const createLifecycleRuleType = createLifecycleRuleTypeFactory({
ruleDataClient,
logger,
});
if (!alerting) {
throw new Error(
'Cannot register the transaction error rate rule type. The alerting plugin need to be enabled.'
);
}
alerting.registerType(
createLifecycleRuleType({
id: ApmRuleType.TransactionErrorRate,
name: ruleTypeConfig.name,
actionGroups: ruleTypeConfig.actionGroups,
defaultActionGroupId: ruleTypeConfig.defaultActionGroupId,
validate: { params: transactionErrorRateParamsSchema },
schemas: {
params: {
type: 'config-schema',
schema: transactionErrorRateParamsSchema,
},
alerting.registerType({
id: ApmRuleType.TransactionErrorRate,
name: ruleTypeConfig.name,
actionGroups: ruleTypeConfig.actionGroups,
defaultActionGroupId: ruleTypeConfig.defaultActionGroupId,
validate: { params: transactionErrorRateParamsSchema },
schemas: {
params: {
type: 'config-schema',
schema: transactionErrorRateParamsSchema,
},
actionVariables: {
context: transactionErrorRateActionVariables,
},
category: DEFAULT_APP_CATEGORIES.observability.id,
producer: APM_SERVER_FEATURE_ID,
minimumLicenseRequired: 'basic',
isExportable: true,
executor: async ({ services, spaceId, params: ruleParams, startedAt, getTimeRange }) => {
const allGroupByFields = getAllGroupByFields(
ApmRuleType.TransactionErrorRate,
ruleParams.groupBy
);
},
actionVariables: {
context: transactionErrorRateActionVariables,
},
category: DEFAULT_APP_CATEGORIES.observability.id,
producer: APM_SERVER_FEATURE_ID,
minimumLicenseRequired: 'basic',
isExportable: true,
executor: async (
options: RuleExecutorOptions<
TransactionErrorRateRuleTypeParams,
TransactionErrorRateRuleTypeState,
TransactionErrorRateAlertState,
TransactionErrorRateAlertContext,
TransactionErrorRateActionGroups,
TransactionErrorRateAlert
>
) => {
const { services, spaceId, params: ruleParams, startedAt, getTimeRange } = options;
const { alertsClient, savedObjectsClient, scopedClusterClient } = services;
if (!alertsClient) {
throw new AlertsClientError();
}
const { getAlertUuid, getAlertStartedDate, savedObjectsClient, scopedClusterClient } =
services;
const allGroupByFields = getAllGroupByFields(
ApmRuleType.TransactionErrorRate,
ruleParams.groupBy
);
const indices = await getApmIndices(savedObjectsClient);
const indices = await getApmIndices(savedObjectsClient);
// only query transaction events when set to 'never',
// to prevent (likely) unnecessary blocking request
// in rule execution
const searchAggregatedTransactions =
apmConfig.searchAggregatedTransactions !== SearchAggregatedTransactionSetting.never;
// only query transaction events when set to 'never',
// to prevent (likely) unnecessary blocking request
// in rule execution
const searchAggregatedTransactions =
apmConfig.searchAggregatedTransactions !== SearchAggregatedTransactionSetting.never;
const index = searchAggregatedTransactions ? indices.metric : indices.transaction;
const index = searchAggregatedTransactions ? indices.metric : indices.transaction;
const termFilterQuery = !ruleParams.searchConfiguration?.query?.query
? [
...termQuery(SERVICE_NAME, ruleParams.serviceName, {
queryEmptyString: false,
}),
...termQuery(TRANSACTION_TYPE, ruleParams.transactionType, {
queryEmptyString: false,
}),
...termQuery(TRANSACTION_NAME, ruleParams.transactionName, {
queryEmptyString: false,
}),
...environmentQuery(ruleParams.environment),
]
: [];
const termFilterQuery = !ruleParams.searchConfiguration?.query?.query
? [
...termQuery(SERVICE_NAME, ruleParams.serviceName, {
queryEmptyString: false,
}),
...termQuery(TRANSACTION_TYPE, ruleParams.transactionType, {
queryEmptyString: false,
}),
...termQuery(TRANSACTION_NAME, ruleParams.transactionName, {
queryEmptyString: false,
}),
...environmentQuery(ruleParams.environment),
]
: [];
const { dateStart } = getTimeRange(`${ruleParams.windowSize}${ruleParams.windowUnit}`);
const { dateStart } = getTimeRange(`${ruleParams.windowSize}${ruleParams.windowUnit}`);
const searchParams = {
index,
body: {
track_total_hits: false,
size: 0,
query: {
bool: {
filter: [
{
range: {
'@timestamp': {
gte: dateStart,
},
const searchParams = {
index,
body: {
track_total_hits: false,
size: 0,
query: {
bool: {
filter: [
{
range: {
'@timestamp': {
gte: dateStart,
},
},
...getBackwardCompatibleDocumentTypeFilter(searchAggregatedTransactions),
{
terms: {
[EVENT_OUTCOME]: [EventOutcome.failure, EventOutcome.success],
},
},
...termFilterQuery,
...getParsedFilterQuery(ruleParams.searchConfiguration?.query?.query as string),
],
},
},
aggs: {
series: {
multi_terms: {
terms: [...getGroupByTerms(allGroupByFields)],
size: 1000,
order: { _count: 'desc' as const },
},
aggs: {
outcomes: {
terms: {
field: EVENT_OUTCOME,
},
aggs: getApmAlertSourceFieldsAgg(),
...getBackwardCompatibleDocumentTypeFilter(searchAggregatedTransactions),
{
terms: {
[EVENT_OUTCOME]: [EventOutcome.failure, EventOutcome.success],
},
},
...termFilterQuery,
...getParsedFilterQuery(ruleParams.searchConfiguration?.query?.query as string),
],
},
},
aggs: {
series: {
multi_terms: {
terms: [...getGroupByTerms(allGroupByFields)],
size: 1000,
order: { _count: 'desc' as const },
},
aggs: {
outcomes: {
terms: {
field: EVENT_OUTCOME,
},
aggs: getApmAlertSourceFieldsAgg(),
},
},
},
},
},
};
const response = await alertingEsClient({
scopedClusterClient,
params: searchParams,
});
if (!response.aggregations) {
return { state: {} };
}
const results = [];
for (const bucket of response.aggregations.series.buckets) {
const groupByFields = bucket.key.reduce((obj, bucketKey, bucketIndex) => {
obj[allGroupByFields[bucketIndex]] = bucketKey;
return obj;
}, {} as Record<string, string>);
const bucketKey = bucket.key;
const failedOutcomeBucket = bucket.outcomes.buckets.find(
(outcomeBucket) => outcomeBucket.key === EventOutcome.failure
);
const failed = failedOutcomeBucket?.doc_count ?? 0;
const succesful =
bucket.outcomes.buckets.find(
(outcomeBucket) => outcomeBucket.key === EventOutcome.success
)?.doc_count ?? 0;
const errorRate = (failed / (failed + succesful)) * 100;
if (errorRate >= ruleParams.threshold) {
results.push({
errorRate,
sourceFields: getApmAlertSourceFields(failedOutcomeBucket),
groupByFields,
bucketKey,
});
}
}
await asyncForEach(results, async (result) => {
const { errorRate, sourceFields, groupByFields, bucketKey } = result;
const alertId = bucketKey.join('_');
const reasonMessage = formatTransactionErrorRateReason({
threshold: ruleParams.threshold,
measured: errorRate,
asPercent,
windowSize: ruleParams.windowSize,
windowUnit: ruleParams.windowUnit,
groupByFields,
});
const { uuid, start } = alertsClient.report({
id: alertId,
actionGroup: ruleTypeConfig.defaultActionGroupId,
});
const indexedStartedAt = start ?? startedAt.toISOString();
const relativeViewInAppUrl = getAlertUrlTransaction(
groupByFields[SERVICE_NAME],
getEnvironmentEsField(groupByFields[SERVICE_ENVIRONMENT])?.[SERVICE_ENVIRONMENT],
groupByFields[TRANSACTION_TYPE]
);
const viewInAppUrl = addSpaceIdToPath(
basePath.publicBaseUrl,
spaceId,
relativeViewInAppUrl
);
const alertDetailsUrl = await getAlertUrl(
uuid,
spaceId,
indexedStartedAt,
alertsLocator,
basePath.publicBaseUrl
);
const groupByActionVariables = getGroupByActionVariables(groupByFields);
const payload = {
[TRANSACTION_NAME]: ruleParams.transactionName,
[PROCESSOR_EVENT]: ProcessorEvent.transaction,
[ALERT_EVALUATION_VALUE]: errorRate,
[ALERT_EVALUATION_THRESHOLD]: ruleParams.threshold,
[ALERT_REASON]: reasonMessage,
...sourceFields,
...groupByFields,
};
const response = await alertingEsClient({
scopedClusterClient,
params: searchParams,
const context = {
alertDetailsUrl,
interval: formatDurationFromTimeUnitChar(
ruleParams.windowSize,
ruleParams.windowUnit as TimeUnitChar
),
reason: reasonMessage,
threshold: ruleParams.threshold,
transactionName: ruleParams.transactionName,
triggerValue: asDecimalOrInteger(errorRate),
viewInAppUrl,
...groupByActionVariables,
};
alertsClient.setAlertData({
id: alertId,
payload,
context,
});
});
if (!response.aggregations) {
return { state: {} };
}
const results = [];
for (const bucket of response.aggregations.series.buckets) {
const groupByFields = bucket.key.reduce((obj, bucketKey, bucketIndex) => {
obj[allGroupByFields[bucketIndex]] = bucketKey;
return obj;
}, {} as Record<string, string>);
const bucketKey = bucket.key;
const failedOutcomeBucket = bucket.outcomes.buckets.find(
(outcomeBucket) => outcomeBucket.key === EventOutcome.failure
);
const failed = failedOutcomeBucket?.doc_count ?? 0;
const succesful =
bucket.outcomes.buckets.find(
(outcomeBucket) => outcomeBucket.key === EventOutcome.success
)?.doc_count ?? 0;
const errorRate = (failed / (failed + succesful)) * 100;
if (errorRate >= ruleParams.threshold) {
results.push({
errorRate,
sourceFields: getApmAlertSourceFields(failedOutcomeBucket),
groupByFields,
bucketKey,
});
}
}
await asyncForEach(results, async (result) => {
const { errorRate, sourceFields, groupByFields, bucketKey } = result;
const alertId = bucketKey.join('_');
const reasonMessage = formatTransactionErrorRateReason({
threshold: ruleParams.threshold,
measured: errorRate,
asPercent,
windowSize: ruleParams.windowSize,
windowUnit: ruleParams.windowUnit,
groupByFields,
});
const alert = services.alertWithLifecycle({
id: alertId,
fields: {
[TRANSACTION_NAME]: ruleParams.transactionName,
[PROCESSOR_EVENT]: ProcessorEvent.transaction,
[ALERT_EVALUATION_VALUE]: errorRate,
[ALERT_EVALUATION_THRESHOLD]: ruleParams.threshold,
[ALERT_REASON]: reasonMessage,
...sourceFields,
...groupByFields,
},
});
const relativeViewInAppUrl = getAlertUrlTransaction(
groupByFields[SERVICE_NAME],
getEnvironmentEsField(groupByFields[SERVICE_ENVIRONMENT])?.[SERVICE_ENVIRONMENT],
groupByFields[TRANSACTION_TYPE]
);
const viewInAppUrl = addSpaceIdToPath(
basePath.publicBaseUrl,
spaceId,
relativeViewInAppUrl
);
const indexedStartedAt = getAlertStartedDate(alertId) ?? startedAt.toISOString();
const alertUuid = getAlertUuid(alertId);
const alertDetailsUrl = await getAlertUrl(
alertUuid,
spaceId,
indexedStartedAt,
alertsLocator,
basePath.publicBaseUrl
);
const groupByActionVariables = getGroupByActionVariables(groupByFields);
alert.scheduleActions(ruleTypeConfig.defaultActionGroupId, {
alertDetailsUrl,
interval: formatDurationFromTimeUnitChar(
ruleParams.windowSize,
ruleParams.windowUnit as TimeUnitChar
),
reason: reasonMessage,
threshold: ruleParams.threshold,
transactionName: ruleParams.transactionName,
triggerValue: asDecimalOrInteger(errorRate),
viewInAppUrl,
...groupByActionVariables,
});
});
return { state: {} };
},
alerts: ApmRuleTypeAlertDefinition,
getViewInAppRelativeUrl: ({ rule }: GetViewInAppRelativeUrlFnOpts<{}>) =>
observabilityPaths.ruleDetails(rule.id),
})
);
return { state: {} };
},
alerts: ApmRuleTypeAlertDefinition,
getViewInAppRelativeUrl: ({ rule }: GetViewInAppRelativeUrlFnOpts<{}>) =>
observabilityPaths.ruleDetails(rule.id),
});
}

View file

@ -1,182 +0,0 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP
exports[`rule registry spaces only: trial Rule Registry API with write permissions when creating a rule writes alerts data to the alert indices 1`] = `
Object {
"event.action": Array [
"open",
],
"event.kind": Array [
"signal",
],
"kibana.alert.consecutive_matches": Array [
1,
],
"kibana.alert.duration.us": Array [
0,
],
"kibana.alert.evaluation.threshold": Array [
30,
],
"kibana.alert.evaluation.value": Array [
50,
],
"kibana.alert.flapping": Array [
false,
],
"kibana.alert.instance.id": Array [
"opbeans-go_ENVIRONMENT_NOT_DEFINED_request",
],
"kibana.alert.reason": Array [
"Failed transactions is 50% in the last 5 mins for service: opbeans-go, env: Not defined, type: request. Alert when > 30%.",
],
"kibana.alert.reason.text": Array [
"Failed transactions is 50% in the last 5 mins for service: opbeans-go, env: Not defined, type: request. Alert when > 30%.",
],
"kibana.alert.rule.category": Array [
"Failed transaction rate threshold",
],
"kibana.alert.rule.consumer": Array [
"apm",
],
"kibana.alert.rule.name": Array [
"Failed transaction rate threshold | opbeans-go",
],
"kibana.alert.rule.parameters": Array [
Object {
"environment": "ENVIRONMENT_ALL",
"serviceName": "opbeans-go",
"threshold": 30,
"transactionType": "request",
"windowSize": 5,
"windowUnit": "m",
},
],
"kibana.alert.rule.producer": Array [
"apm",
],
"kibana.alert.rule.revision": Array [
0,
],
"kibana.alert.rule.rule_type_id": Array [
"apm.transaction_error_rate",
],
"kibana.alert.rule.tags": Array [
"apm",
"service.name:opbeans-go",
],
"kibana.alert.status": Array [
"active",
],
"kibana.alert.workflow_status": Array [
"open",
],
"kibana.space_ids": Array [
"space1",
],
"processor.event": Array [
"transaction",
],
"service.environment": Array [
"ENVIRONMENT_NOT_DEFINED",
],
"service.name": Array [
"opbeans-go",
],
"tags": Array [
"apm",
"service.name:opbeans-go",
],
"transaction.type": Array [
"request",
],
}
`;
exports[`rule registry spaces only: trial Rule Registry API with write permissions when creating a rule writes alerts data to the alert indices 2`] = `
Object {
"event.action": Array [
"close",
],
"event.kind": Array [
"signal",
],
"kibana.alert.consecutive_matches": Array [
0,
],
"kibana.alert.evaluation.threshold": Array [
30,
],
"kibana.alert.evaluation.value": Array [
50,
],
"kibana.alert.flapping": Array [
false,
],
"kibana.alert.instance.id": Array [
"opbeans-go_ENVIRONMENT_NOT_DEFINED_request",
],
"kibana.alert.reason": Array [
"Failed transactions is 50% in the last 5 mins for service: opbeans-go, env: Not defined, type: request. Alert when > 30%.",
],
"kibana.alert.reason.text": Array [
"Failed transactions is 50% in the last 5 mins for service: opbeans-go, env: Not defined, type: request. Alert when > 30%.",
],
"kibana.alert.rule.category": Array [
"Failed transaction rate threshold",
],
"kibana.alert.rule.consumer": Array [
"apm",
],
"kibana.alert.rule.name": Array [
"Failed transaction rate threshold | opbeans-go",
],
"kibana.alert.rule.parameters": Array [
Object {
"environment": "ENVIRONMENT_ALL",
"serviceName": "opbeans-go",
"threshold": 30,
"transactionType": "request",
"windowSize": 5,
"windowUnit": "m",
},
],
"kibana.alert.rule.producer": Array [
"apm",
],
"kibana.alert.rule.revision": Array [
0,
],
"kibana.alert.rule.rule_type_id": Array [
"apm.transaction_error_rate",
],
"kibana.alert.rule.tags": Array [
"apm",
"service.name:opbeans-go",
],
"kibana.alert.status": Array [
"recovered",
],
"kibana.alert.workflow_status": Array [
"open",
],
"kibana.space_ids": Array [
"space1",
],
"processor.event": Array [
"transaction",
],
"service.environment": Array [
"ENVIRONMENT_NOT_DEFINED",
],
"service.name": Array [
"opbeans-go",
],
"tags": Array [
"apm",
"service.name:opbeans-go",
],
"transaction.type": Array [
"request",
],
}
`;

View file

@ -1,318 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import expect from '@kbn/expect';
import {
ALERT_DURATION,
ALERT_END,
ALERT_INSTANCE_ID,
ALERT_RULE_EXECUTION_UUID,
ALERT_RULE_UUID,
ALERT_START,
ALERT_STATUS,
ALERT_TIME_RANGE,
ALERT_UUID,
EVENT_KIND,
VERSION,
} from '@kbn/rule-data-utils';
import { omit } from 'lodash';
import { Rule } from '@kbn/alerting-plugin/common';
import { SerializedConcreteTaskInstance } from '@kbn/task-manager-plugin/server/task';
import type { RuleTaskState } from '@kbn/alerting-state-types';
import type { FtrProviderContext } from '../../../common/ftr_provider_context';
import {
getAlertsTargetIndices,
createApmMetricIndex,
createAlert,
waitUntilNextExecution,
createTransactionMetric,
cleanupTargetIndices,
deleteAlert,
} from '../../../common/lib/helpers';
import { AlertDef, AlertParams } from '../../../common/types';
import { APM_METRIC_INDEX_NAME } from '../../../common/constants';
import { obsOnly } from '../../../common/lib/authentication/users';
import { getEventLog } from '../../../../alerting_api_integration/common/lib/get_event_log';
const SPACE_ID = 'space1';
// eslint-disable-next-line import/no-default-export
export default function registryRulesApiTest({ getService }: FtrProviderContext) {
const es = getService('es');
describe('Rule Registry API', async () => {
describe('with write permissions', () => {
describe('when creating a rule', () => {
let createResponse: {
alert: Rule;
status: number;
};
before(async () => {
await createApmMetricIndex(getService);
const alertDef: AlertDef<AlertParams> = {
params: {
threshold: 30,
windowSize: 5,
windowUnit: 'm',
transactionType: 'request',
environment: 'ENVIRONMENT_ALL',
serviceName: 'opbeans-go',
},
consumer: 'apm',
alertTypeId: 'apm.transaction_error_rate',
schedule: { interval: '5s' },
actions: [],
tags: ['apm', 'service.name:opbeans-go'],
notifyWhen: 'onActionGroupChange',
name: 'Failed transaction rate threshold | opbeans-go',
};
createResponse = await createAlert(getService, obsOnly, SPACE_ID, alertDef);
});
after(async () => {
await deleteAlert(getService, obsOnly, SPACE_ID, createResponse.alert.id);
await cleanupTargetIndices(getService, obsOnly, SPACE_ID);
});
it('writes alerts data to the alert indices', async () => {
expect(createResponse.status).to.be.below(299);
expect(createResponse.alert).not.to.be(undefined);
let alert = await waitUntilNextExecution(
getService,
obsOnly,
createResponse.alert,
SPACE_ID
);
const { body: targetIndices } = await getAlertsTargetIndices(
getService,
obsOnly,
SPACE_ID
);
try {
const res = await es.search({
index: targetIndices[0],
body: {
query: {
term: {
[EVENT_KIND]: 'signal',
},
},
size: 1,
sort: {
'@timestamp': 'desc',
},
},
});
expect(res.hits.hits).to.be.empty();
} catch (exc) {
expect(exc.message).contain('index_not_found_exception');
}
await es.index({
index: APM_METRIC_INDEX_NAME,
body: createTransactionMetric({
event: {
outcome: 'success',
},
}),
refresh: true,
});
alert = await waitUntilNextExecution(getService, obsOnly, alert, SPACE_ID);
try {
const res = await es.search({
index: targetIndices[0],
body: {
query: {
term: {
[EVENT_KIND]: 'signal',
},
},
size: 1,
sort: {
'@timestamp': 'desc',
},
},
});
expect(res.hits.hits).to.be.empty();
} catch (exc) {
expect(exc.message).contain('index_not_found_exception');
}
await es.index({
index: APM_METRIC_INDEX_NAME,
body: createTransactionMetric({
event: {
outcome: 'failure',
},
}),
refresh: true,
});
alert = await waitUntilNextExecution(getService, obsOnly, alert, SPACE_ID);
const afterViolatingDataResponse = await es.search({
index: targetIndices[0],
body: {
query: {
term: {
[EVENT_KIND]: 'signal',
},
},
size: 1,
sort: {
'@timestamp': 'desc',
},
_source: false,
fields: [{ field: '*', include_unmapped: true }],
},
});
expect(afterViolatingDataResponse.hits.hits.length).to.be(1);
const alertEvent = afterViolatingDataResponse.hits.hits[0].fields as Record<string, any>;
const exclude = [
'@timestamp',
ALERT_START,
ALERT_UUID,
ALERT_RULE_EXECUTION_UUID,
ALERT_RULE_UUID,
ALERT_TIME_RANGE,
VERSION,
];
const alertInstanceId = alertEvent[ALERT_INSTANCE_ID]?.[0];
const alertUuid = alertEvent[ALERT_UUID]?.[0];
const executionUuid = alertEvent[ALERT_RULE_EXECUTION_UUID]?.[0];
expect(typeof alertUuid).to.be('string');
expect(typeof executionUuid).to.be('string');
await checkEventLogAlertUuids(
getService,
SPACE_ID,
createResponse.alert.id,
alertInstanceId,
alertUuid,
executionUuid
);
const toCompare = omit(alertEvent, exclude);
expectSnapshot(toCompare).toMatch();
await es.bulk({
index: APM_METRIC_INDEX_NAME,
body: [
{ index: {} },
createTransactionMetric({
event: {
outcome: 'success',
},
}),
{ index: {} },
createTransactionMetric({
event: {
outcome: 'success',
},
}),
],
refresh: true,
});
alert = await waitUntilNextExecution(getService, obsOnly, alert, SPACE_ID);
const afterRecoveryResponse = await es.search({
index: targetIndices[0],
body: {
query: {
term: {
[EVENT_KIND]: 'signal',
},
},
size: 1,
sort: {
'@timestamp': 'desc',
},
_source: false,
fields: [{ field: '*', include_unmapped: true }],
},
});
expect(afterRecoveryResponse.hits.hits.length).to.be(1);
const recoveredAlertEvent = afterRecoveryResponse.hits.hits[0].fields as Record<
string,
any
>;
expect(recoveredAlertEvent[ALERT_STATUS]?.[0]).to.eql('recovered');
expect(recoveredAlertEvent[ALERT_DURATION]?.[0]).to.be.greaterThan(0);
expect(new Date(recoveredAlertEvent[ALERT_END]?.[0]).getTime()).to.be.greaterThan(0);
expectSnapshot(
omit(recoveredAlertEvent, exclude.concat([ALERT_DURATION, ALERT_END]))
).toMatch();
});
});
});
});
}
async function checkEventLogAlertUuids(
getService: FtrProviderContext['getService'],
spaceId: string,
ruleId: string,
alertInstanceId: string,
alertUuid: string,
executionUuid: string
) {
const es = getService('es');
const retry = getService('retry');
const docs: Awaited<ReturnType<typeof getEventLog>> = [];
await retry.waitFor('getting event log docs', async () => {
docs.push(...(await getEventLogDocs()));
return docs.length > 0;
});
expect(docs.length).to.be.greaterThan(0);
for (const doc of docs) {
expect(doc?.kibana?.alert?.uuid).to.be(alertUuid);
}
// check that the task doc has the same UUID
const taskDoc = await es.get<{ task: SerializedConcreteTaskInstance }>({
index: '.kibana_task_manager',
id: `task:${ruleId}`,
});
const ruleStateString = taskDoc._source?.task.state || 'task-state-is-missing';
const ruleState: RuleTaskState = JSON.parse(ruleStateString);
if (ruleState.alertInstances?.[alertInstanceId]) {
expect(ruleState.alertInstances[alertInstanceId].meta?.uuid).to.be(alertUuid);
} else if (ruleState.alertRecoveredInstances?.[alertInstanceId]) {
expect(ruleState.alertRecoveredInstances[alertInstanceId].meta?.uuid).to.be(alertUuid);
} else {
expect(false).to.be('alert instance not found in task doc');
}
function getEventLogDocs() {
return getEventLog({
getService,
spaceId,
type: 'alert',
id: ruleId,
provider: 'alerting',
actions: new Map([['active-instance', { equal: 1 }]]),
filter: `kibana.alert.rule.execution.uuid: ${executionUuid}`,
});
}
}

View file

@ -22,7 +22,6 @@ export default ({ loadTestFile, getService }: FtrProviderContext): void => {
// Trial
loadTestFile(require.resolve('./get_alert_by_id'));
loadTestFile(require.resolve('./update_alert'));
loadTestFile(require.resolve('./create_rule'));
loadTestFile(require.resolve('./lifecycle_executor'));
});
};