[8.7] [APM] adding API test for alert error count threshold (#150764) (#151065)

# Backport

This will backport the following commits from `main` to `8.7`:
- [[APM] adding API test for alert error count threshold
(#150764)](https://github.com/elastic/kibana/pull/150764)

<!--- Backport version: 8.9.7 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Cauê
Marcondes","email":"55978943+cauemarcondes@users.noreply.github.com"},"sourceCommit":{"committedDate":"2023-02-13T19:30:42Z","message":"[APM]
adding API test for alert error count threshold (#150764)\n\nAdd new api
test for error count
threshold\r\n`tests/alerts/error_count_threshold.spec.ts`\r\n\r\n---------\r\n\r\nCo-authored-by:
kibanamachine
<42973632+kibanamachine@users.noreply.github.com>","sha":"eb4b92d876376d280c41dc352915e5e6e975cc07","branchLabelMapping":{"^v8.8.0$":"main","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["Team:APM","release_note:skip","v8.7.0","v8.8.0"],"number":150764,"url":"https://github.com/elastic/kibana/pull/150764","mergeCommit":{"message":"[APM]
adding API test for alert error count threshold (#150764)\n\nAdd new api
test for error count
threshold\r\n`tests/alerts/error_count_threshold.spec.ts`\r\n\r\n---------\r\n\r\nCo-authored-by:
kibanamachine
<42973632+kibanamachine@users.noreply.github.com>","sha":"eb4b92d876376d280c41dc352915e5e6e975cc07"}},"sourceBranch":"main","suggestedTargetBranches":["8.7"],"targetPullRequestStates":[{"branch":"8.7","label":"v8.7.0","labelRegex":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"main","label":"v8.8.0","labelRegex":"^v8.8.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/150764","number":150764,"mergeCommit":{"message":"[APM]
adding API test for alert error count threshold (#150764)\n\nAdd new api
test for error count
threshold\r\n`tests/alerts/error_count_threshold.spec.ts`\r\n\r\n---------\r\n\r\nCo-authored-by:
kibanamachine
<42973632+kibanamachine@users.noreply.github.com>","sha":"eb4b92d876376d280c41dc352915e5e6e975cc07"}}]}]
BACKPORT-->

Co-authored-by: Cauê Marcondes <55978943+cauemarcondes@users.noreply.github.com>
This commit is contained in:
Kibana Machine 2023-02-13 15:34:32 -05:00 committed by GitHub
parent a32ba2c886
commit eccd41dca1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 626 additions and 288 deletions

View file

@ -5,7 +5,7 @@
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
import datemath from '@kbn/datemath';
import type { Moment } from 'moment';
import { Interval } from './interval';
@ -23,12 +23,22 @@ export class Timerange {
type DateLike = Date | number | Moment | string;
function getDateFrom(date: DateLike): Date {
function getDateFrom(date: DateLike, now: Date): Date {
if (date instanceof Date) return date;
if (typeof date === 'string') {
const parsed = datemath.parse(date, { forceNow: now });
if (parsed && parsed.isValid()) {
return parsed.toDate();
}
}
if (typeof date === 'number' || typeof date === 'string') return new Date(date);
return date.toDate();
}
export function timerange(from: Date | number | Moment, to: Date | number | Moment) {
return new Timerange(getDateFrom(from), getDateFrom(to));
export function timerange(from: DateLike, to: DateLike) {
const now = new Date();
return new Timerange(getDateFrom(from, now), getDateFrom(to, now));
}

View file

@ -11,5 +11,8 @@
"include": ["**/*.ts"],
"exclude": [
"target/**/*",
],
"kbn_references": [
"@kbn/datemath",
]
}

View file

@ -0,0 +1,60 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { i18n } from '@kbn/i18n';
export const errorCountMessage = i18n.translate(
'xpack.apm.alertTypes.errorCount.defaultActionMessage',
{
defaultMessage: `\\{\\{alertName\\}\\} alert is firing because of the following conditions:
- Service name: \\{\\{context.serviceName\\}\\}
- Environment: \\{\\{context.environment\\}\\}
- Threshold: \\{\\{context.threshold\\}\\}
- Triggered value: \\{\\{context.triggerValue\\}\\} errors over the last \\{\\{context.interval\\}\\}`,
}
);
export const transactionDurationMessage = i18n.translate(
'xpack.apm.alertTypes.transactionDuration.defaultActionMessage',
{
defaultMessage: `\\{\\{alertName\\}\\} alert is firing because of the following conditions:
- Service name: \\{\\{context.serviceName\\}\\}
- Type: \\{\\{context.transactionType\\}\\}
- Environment: \\{\\{context.environment\\}\\}
- Latency threshold: \\{\\{context.threshold\\}\\}ms
- Latency observed: \\{\\{context.triggerValue\\}\\} over the last \\{\\{context.interval\\}\\}`,
}
);
export const transactionErrorRateMessage = i18n.translate(
'xpack.apm.alertTypes.transactionErrorRate.defaultActionMessage',
{
defaultMessage: `\\{\\{alertName\\}\\} alert is firing because of the following conditions:
- Service name: \\{\\{context.serviceName\\}\\}
- Type: \\{\\{context.transactionType\\}\\}
- Environment: \\{\\{context.environment\\}\\}
- Threshold: \\{\\{context.threshold\\}\\}%
- Triggered value: \\{\\{context.triggerValue\\}\\}% of errors over the last \\{\\{context.interval\\}\\}`,
}
);
export const anomalyMessage = i18n.translate(
'xpack.apm.alertTypes.transactionDurationAnomaly.defaultActionMessage',
{
defaultMessage: `\\{\\{alertName\\}\\} alert is firing because of the following conditions:
- Service name: \\{\\{context.serviceName\\}\\}
- Type: \\{\\{context.transactionType\\}\\}
- Environment: \\{\\{context.environment\\}\\}
- Severity threshold: \\{\\{context.threshold\\}\\}
- Severity value: \\{\\{context.triggerValue\\}\\}
`,
}
);

View file

@ -0,0 +1,71 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { schema, TypeOf } from '@kbn/config-schema';
import { ANOMALY_SEVERITY } from '../ml_constants';
import { AggregationType, ApmRuleType } from './apm_rule_types';
export const errorCountParamsSchema = schema.object({
windowSize: schema.number(),
windowUnit: schema.string(),
threshold: schema.number(),
serviceName: schema.maybe(schema.string()),
environment: schema.string(),
});
export const transactionDurationParamsSchema = schema.object({
serviceName: schema.maybe(schema.string()),
transactionType: schema.maybe(schema.string()),
windowSize: schema.number(),
windowUnit: schema.string(),
threshold: schema.number(),
aggregationType: schema.oneOf([
schema.literal(AggregationType.Avg),
schema.literal(AggregationType.P95),
schema.literal(AggregationType.P99),
]),
environment: schema.string(),
});
export const anomalyParamsSchema = schema.object({
serviceName: schema.maybe(schema.string()),
transactionType: schema.maybe(schema.string()),
windowSize: schema.number(),
windowUnit: schema.string(),
environment: schema.string(),
anomalySeverityType: schema.oneOf([
schema.literal(ANOMALY_SEVERITY.CRITICAL),
schema.literal(ANOMALY_SEVERITY.MAJOR),
schema.literal(ANOMALY_SEVERITY.MINOR),
schema.literal(ANOMALY_SEVERITY.WARNING),
]),
});
export const transactionErrorRateParamsSchema = schema.object({
windowSize: schema.number(),
windowUnit: schema.string(),
threshold: schema.number(),
transactionType: schema.maybe(schema.string()),
serviceName: schema.maybe(schema.string()),
environment: schema.string(),
});
type ErrorCountParamsType = TypeOf<typeof errorCountParamsSchema>;
type TransactionDurationParamsType = TypeOf<
typeof transactionDurationParamsSchema
>;
type AnomalyParamsType = TypeOf<typeof anomalyParamsSchema>;
type TransactionErrorRateParamsType = TypeOf<
typeof transactionErrorRateParamsSchema
>;
export interface ApmRuleParamsType {
[ApmRuleType.TransactionDuration]: TransactionDurationParamsType;
[ApmRuleType.ErrorCount]: ErrorCountParamsType;
[ApmRuleType.Anomaly]: AnomalyParamsType;
[ApmRuleType.TransactionErrorRate]: TransactionErrorRateParamsType;
}

View file

@ -14,6 +14,12 @@ import {
getAlertUrlTransaction,
} from '../../../../common/utils/formatters';
import { ApmRuleType } from '../../../../common/rules/apm_rule_types';
import {
anomalyMessage,
errorCountMessage,
transactionDurationMessage,
transactionErrorRateMessage,
} from '../../../../common/rules/default_action_message';
// copied from elasticsearch_fieldnames.ts to limit page load bundle size
const SERVICE_ENVIRONMENT = 'service.environment';
@ -54,17 +60,7 @@ export function registerApmRuleTypes(
)
),
requiresAppContext: false,
defaultActionMessage: i18n.translate(
'xpack.apm.alertTypes.errorCount.defaultActionMessage',
{
defaultMessage: `\\{\\{alertName\\}\\} alert is firing because of the following conditions:
- Service name: \\{\\{context.serviceName\\}\\}
- Environment: \\{\\{context.environment\\}\\}
- Threshold: \\{\\{context.threshold\\}\\} errors
- Triggered value: \\{\\{context.triggerValue\\}\\} errors over the last \\{\\{context.interval\\}\\}`,
}
),
defaultActionMessage: errorCountMessage,
});
observabilityRuleTypeRegistry.register({
@ -104,18 +100,7 @@ export function registerApmRuleTypes(
)
),
requiresAppContext: false,
defaultActionMessage: i18n.translate(
'xpack.apm.alertTypes.transactionDuration.defaultActionMessage',
{
defaultMessage: `\\{\\{alertName\\}\\} alert is firing because of the following conditions:
- Service name: \\{\\{context.serviceName\\}\\}
- Type: \\{\\{context.transactionType\\}\\}
- Environment: \\{\\{context.environment\\}\\}
- Latency threshold: \\{\\{context.threshold\\}\\}ms
- Latency observed: \\{\\{context.triggerValue\\}\\} over the last \\{\\{context.interval\\}\\}`,
}
),
defaultActionMessage: transactionDurationMessage,
});
observabilityRuleTypeRegistry.register({
@ -153,18 +138,7 @@ export function registerApmRuleTypes(
)
),
requiresAppContext: false,
defaultActionMessage: i18n.translate(
'xpack.apm.alertTypes.transactionErrorRate.defaultActionMessage',
{
defaultMessage: `\\{\\{alertName\\}\\} alert is firing because of the following conditions:
- Service name: \\{\\{context.serviceName\\}\\}
- Type: \\{\\{context.transactionType\\}\\}
- Environment: \\{\\{context.environment\\}\\}
- Threshold: \\{\\{context.threshold\\}\\}%
- Triggered value: \\{\\{context.triggerValue\\}\\}% of errors over the last \\{\\{context.interval\\}\\}`,
}
),
defaultActionMessage: transactionErrorRateMessage,
});
observabilityRuleTypeRegistry.register({
@ -199,18 +173,6 @@ export function registerApmRuleTypes(
)
),
requiresAppContext: false,
defaultActionMessage: i18n.translate(
'xpack.apm.alertTypes.transactionDurationAnomaly.defaultActionMessage',
{
defaultMessage: `\\{\\{alertName\\}\\} alert is firing because of the following conditions:
- Service name: \\{\\{context.serviceName\\}\\}
- Type: \\{\\{context.transactionType\\}\\}
- Environment: \\{\\{context.environment\\}\\}
- Severity threshold: \\{\\{context.threshold\\}\\}
- Severity value: \\{\\{context.triggerValue\\}\\}
`,
}
),
defaultActionMessage: anomalyMessage,
});
}

View file

@ -5,7 +5,6 @@
* 2.0.
*/
import { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { schema } from '@kbn/config-schema';
import { KibanaRequest } from '@kbn/core/server';
import datemath from '@kbn/datemath';
import type { ESSearchResponse } from '@kbn/es-types';
@ -36,7 +35,6 @@ import {
getEnvironmentEsField,
getEnvironmentLabel,
} from '../../../../../common/environment_filter_values';
import { ANOMALY_SEVERITY } from '../../../../../common/ml_constants';
import {
ANOMALY_ALERT_SEVERITY_TYPES,
ApmRuleType,
@ -49,20 +47,7 @@ import { getMLJobs } from '../../../service_map/get_service_anomalies';
import { apmActionVariables } from '../../action_variables';
import { RegisterRuleDependencies } from '../../register_apm_rule_types';
import { getServiceGroupFieldsForAnomaly } from './get_service_group_fields_for_anomaly';
const paramsSchema = schema.object({
serviceName: schema.maybe(schema.string()),
transactionType: schema.maybe(schema.string()),
windowSize: schema.number(),
windowUnit: schema.string(),
environment: schema.string(),
anomalySeverityType: schema.oneOf([
schema.literal(ANOMALY_SEVERITY.CRITICAL),
schema.literal(ANOMALY_SEVERITY.MAJOR),
schema.literal(ANOMALY_SEVERITY.MINOR),
schema.literal(ANOMALY_SEVERITY.WARNING),
]),
});
import { anomalyParamsSchema } from '../../../../../common/rules/schema';
const ruleTypeConfig = RULE_TYPES_CONFIG[ApmRuleType.Anomaly];
@ -86,9 +71,7 @@ export function registerAnomalyRuleType({
name: ruleTypeConfig.name,
actionGroups: ruleTypeConfig.actionGroups,
defaultActionGroupId: ruleTypeConfig.defaultActionGroupId,
validate: {
params: paramsSchema,
},
validate: { params: anomalyParamsSchema },
actionVariables: {
context: [
...(observability.getAlertDetailsConfig()?.apm.enabled

View file

@ -146,7 +146,7 @@ describe('Error count alert', () => {
threshold: 2,
triggerValue: 5,
reason: 'Error count is 5 in the last 5 mins for foo. Alert when > 2.',
interval: '5m',
interval: '5 mins',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/foo/errors?environment=env-foo',
});
@ -159,7 +159,7 @@ describe('Error count alert', () => {
threshold: 2,
triggerValue: 4,
reason: 'Error count is 4 in the last 5 mins for foo. Alert when > 2.',
interval: '5m',
interval: '5 mins',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/foo/errors?environment=env-foo-2',
});
@ -172,7 +172,7 @@ describe('Error count alert', () => {
reason: 'Error count is 3 in the last 5 mins for bar. Alert when > 2.',
threshold: 2,
triggerValue: 3,
interval: '5m',
interval: '5 mins',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/bar/errors?environment=env-bar',
});

View file

@ -5,54 +5,49 @@
* 2.0.
*/
import { schema } from '@kbn/config-schema';
import { firstValueFrom } from 'rxjs';
import { getAlertDetailsUrl } from '@kbn/infra-plugin/server/lib/alerting/common/utils';
import {
formatDurationFromTimeUnitChar,
ProcessorEvent,
TimeUnitChar,
} from '@kbn/observability-plugin/common';
import { termQuery } from '@kbn/observability-plugin/server';
import {
ALERT_EVALUATION_THRESHOLD,
ALERT_EVALUATION_VALUE,
ALERT_REASON,
} from '@kbn/rule-data-utils';
import { createLifecycleRuleTypeFactory } from '@kbn/rule-registry-plugin/server';
import { termQuery } from '@kbn/observability-plugin/server';
import { ProcessorEvent } from '@kbn/observability-plugin/common';
import { getAlertDetailsUrl } from '@kbn/infra-plugin/server/lib/alerting/common/utils';
import { addSpaceIdToPath } from '@kbn/spaces-plugin/common';
import { firstValueFrom } from 'rxjs';
import {
ENVIRONMENT_NOT_DEFINED,
getEnvironmentEsField,
getEnvironmentLabel,
} from '../../../../../common/environment_filter_values';
import { getAlertUrlErrorCount } from '../../../../../common/utils/formatters';
import {
ApmRuleType,
APM_SERVER_FEATURE_ID,
RULE_TYPES_CONFIG,
formatErrorCountReason,
} from '../../../../../common/rules/apm_rule_types';
import {
PROCESSOR_EVENT,
SERVICE_ENVIRONMENT,
SERVICE_NAME,
} from '../../../../../common/es_fields/apm';
import {
ApmRuleType,
APM_SERVER_FEATURE_ID,
formatErrorCountReason,
RULE_TYPES_CONFIG,
} from '../../../../../common/rules/apm_rule_types';
import { errorCountParamsSchema } from '../../../../../common/rules/schema';
import { environmentQuery } from '../../../../../common/utils/environment_query';
import { getAlertUrlErrorCount } from '../../../../../common/utils/formatters';
import { getApmIndices } from '../../../settings/apm_indices/get_apm_indices';
import { apmActionVariables } from '../../action_variables';
import { alertingEsClient } from '../../alerting_es_client';
import { RegisterRuleDependencies } from '../../register_apm_rule_types';
import {
getServiceGroupFieldsAgg,
getServiceGroupFields,
getServiceGroupFieldsAgg,
} from '../get_service_group_fields';
const paramsSchema = schema.object({
windowSize: schema.number(),
windowUnit: schema.string(),
threshold: schema.number(),
serviceName: schema.maybe(schema.string()),
environment: schema.string(),
});
const ruleTypeConfig = RULE_TYPES_CONFIG[ApmRuleType.ErrorCount];
export function registerErrorCountRuleType({
@ -74,9 +69,7 @@ export function registerErrorCountRuleType({
name: ruleTypeConfig.name,
actionGroups: ruleTypeConfig.actionGroups,
defaultActionGroupId: ruleTypeConfig.defaultActionGroupId,
validate: {
params: paramsSchema,
},
validate: { params: errorCountParamsSchema },
actionVariables: {
context: [
...(observability.getAlertDetailsConfig()?.apm.enabled
@ -214,7 +207,10 @@ export function registerErrorCountRuleType({
.scheduleActions(ruleTypeConfig.defaultActionGroupId, {
alertDetailsUrl,
environment: getEnvironmentLabel(environment),
interval: `${ruleParams.windowSize}${ruleParams.windowUnit}`,
interval: formatDurationFromTimeUnitChar(
ruleParams.windowSize,
ruleParams.windowUnit as TimeUnitChar
),
reason: alertReason,
serviceName,
threshold: ruleParams.threshold,

View file

@ -60,7 +60,7 @@ describe('registerTransactionDurationRuleType', () => {
'http://localhost:5601/eyr/app/observability/alerts/'
),
environment: 'Not defined',
interval: `5m`,
interval: `5 mins`,
reason:
'Avg. latency is 5,500 ms in the last 5 mins for opbeans-java. Alert when > 3,000 ms.',
transactionType: 'request',

View file

@ -6,41 +6,46 @@
*/
import { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { schema } from '@kbn/config-schema';
import { getAlertDetailsUrl } from '@kbn/infra-plugin/server/lib/alerting/common/utils';
import {
formatDurationFromTimeUnitChar,
ProcessorEvent,
TimeUnitChar,
} from '@kbn/observability-plugin/common';
import { asDuration } from '@kbn/observability-plugin/common/utils/formatters';
import { termQuery } from '@kbn/observability-plugin/server';
import {
ALERT_EVALUATION_THRESHOLD,
ALERT_EVALUATION_VALUE,
ALERT_REASON,
} from '@kbn/rule-data-utils';
import { firstValueFrom } from 'rxjs';
import { asDuration } from '@kbn/observability-plugin/common/utils/formatters';
import { termQuery } from '@kbn/observability-plugin/server';
import { createLifecycleRuleTypeFactory } from '@kbn/rule-registry-plugin/server';
import { ProcessorEvent } from '@kbn/observability-plugin/common';
import { getAlertDetailsUrl } from '@kbn/infra-plugin/server/lib/alerting/common/utils';
import { addSpaceIdToPath } from '@kbn/spaces-plugin/common';
import { getAlertUrlTransaction } from '../../../../../common/utils/formatters';
import { firstValueFrom } from 'rxjs';
import { SearchAggregatedTransactionSetting } from '../../../../../common/aggregated_transactions';
import {
ApmRuleType,
AggregationType,
RULE_TYPES_CONFIG,
APM_SERVER_FEATURE_ID,
formatTransactionDurationReason,
} from '../../../../../common/rules/apm_rule_types';
import {
PROCESSOR_EVENT,
SERVICE_NAME,
TRANSACTION_TYPE,
SERVICE_ENVIRONMENT,
} from '../../../../../common/es_fields/apm';
import {
ENVIRONMENT_NOT_DEFINED,
getEnvironmentEsField,
getEnvironmentLabel,
} from '../../../../../common/environment_filter_values';
import {
PROCESSOR_EVENT,
SERVICE_ENVIRONMENT,
SERVICE_NAME,
TRANSACTION_TYPE,
} from '../../../../../common/es_fields/apm';
import {
ApmRuleType,
APM_SERVER_FEATURE_ID,
formatTransactionDurationReason,
RULE_TYPES_CONFIG,
} from '../../../../../common/rules/apm_rule_types';
import { transactionDurationParamsSchema } from '../../../../../common/rules/schema';
import { environmentQuery } from '../../../../../common/utils/environment_query';
import { getDurationFormatter } from '../../../../../common/utils/formatters';
import {
getAlertUrlTransaction,
getDurationFormatter,
} from '../../../../../common/utils/formatters';
import {
getDocumentTypeFilterForTransactions,
getDurationFieldForTransactions,
@ -49,28 +54,14 @@ import { getApmIndices } from '../../../settings/apm_indices/get_apm_indices';
import { apmActionVariables } from '../../action_variables';
import { alertingEsClient } from '../../alerting_es_client';
import { RegisterRuleDependencies } from '../../register_apm_rule_types';
import {
averageOrPercentileAgg,
getMultiTermsSortOrder,
} from './average_or_percentile_agg';
import {
getServiceGroupFields,
getServiceGroupFieldsAgg,
} from '../get_service_group_fields';
const paramsSchema = schema.object({
serviceName: schema.maybe(schema.string()),
transactionType: schema.maybe(schema.string()),
windowSize: schema.number(),
windowUnit: schema.string(),
threshold: schema.number(),
aggregationType: schema.oneOf([
schema.literal(AggregationType.Avg),
schema.literal(AggregationType.P95),
schema.literal(AggregationType.P99),
]),
environment: schema.string(),
});
import {
averageOrPercentileAgg,
getMultiTermsSortOrder,
} from './average_or_percentile_agg';
const ruleTypeConfig = RULE_TYPES_CONFIG[ApmRuleType.TransactionDuration];
@ -92,9 +83,7 @@ export function registerTransactionDurationRuleType({
name: ruleTypeConfig.name,
actionGroups: ruleTypeConfig.actionGroups,
defaultActionGroupId: ruleTypeConfig.defaultActionGroupId,
validate: {
params: paramsSchema,
},
validate: { params: transactionDurationParamsSchema },
actionVariables: {
context: [
...(observability.getAlertDetailsConfig()?.apm.enabled
@ -289,7 +278,10 @@ export function registerTransactionDurationRuleType({
.scheduleActions(ruleTypeConfig.defaultActionGroupId, {
alertDetailsUrl,
environment: environmentLabel,
interval: `${ruleParams.windowSize}${ruleParams.windowUnit}`,
interval: formatDurationFromTimeUnitChar(
ruleParams.windowSize,
ruleParams.windowUnit as TimeUnitChar
),
reason,
serviceName,
threshold: ruleParams.threshold,

View file

@ -131,7 +131,7 @@ describe('Transaction error rate alert', () => {
'Failed transactions is 10% in the last 5 mins for foo. Alert when > 10%.',
threshold: 10,
triggerValue: '10',
interval: '5m',
interval: '5 mins',
viewInAppUrl:
'http://localhost:5601/eyr/app/apm/services/foo?transactionType=type-foo&environment=env-foo',
});

View file

@ -5,31 +5,28 @@
* 2.0.
*/
import { schema } from '@kbn/config-schema';
import { firstValueFrom } from 'rxjs';
import { getAlertDetailsUrl } from '@kbn/infra-plugin/server/lib/alerting/common/utils';
import {
formatDurationFromTimeUnitChar,
ProcessorEvent,
TimeUnitChar,
} from '@kbn/observability-plugin/common';
import { asPercent } from '@kbn/observability-plugin/common/utils/formatters';
import { termQuery } from '@kbn/observability-plugin/server';
import {
ALERT_EVALUATION_THRESHOLD,
ALERT_EVALUATION_VALUE,
ALERT_REASON,
} from '@kbn/rule-data-utils';
import { createLifecycleRuleTypeFactory } from '@kbn/rule-registry-plugin/server';
import { asPercent } from '@kbn/observability-plugin/common/utils/formatters';
import { termQuery } from '@kbn/observability-plugin/server';
import { addSpaceIdToPath } from '@kbn/spaces-plugin/common';
import { ProcessorEvent } from '@kbn/observability-plugin/common';
import { getAlertDetailsUrl } from '@kbn/infra-plugin/server/lib/alerting/common/utils';
import { firstValueFrom } from 'rxjs';
import { SearchAggregatedTransactionSetting } from '../../../../../common/aggregated_transactions';
import {
ENVIRONMENT_NOT_DEFINED,
getEnvironmentEsField,
getEnvironmentLabel,
} from '../../../../../common/environment_filter_values';
import { getAlertUrlTransaction } from '../../../../../common/utils/formatters';
import {
ApmRuleType,
RULE_TYPES_CONFIG,
APM_SERVER_FEATURE_ID,
formatTransactionErrorRateReason,
} from '../../../../../common/rules/apm_rule_types';
import {
EVENT_OUTCOME,
PROCESSOR_EVENT,
@ -38,28 +35,28 @@ import {
TRANSACTION_TYPE,
} from '../../../../../common/es_fields/apm';
import { EventOutcome } from '../../../../../common/event_outcome';
import { asDecimalOrInteger } from '../../../../../common/utils/formatters';
import {
ApmRuleType,
APM_SERVER_FEATURE_ID,
formatTransactionErrorRateReason,
RULE_TYPES_CONFIG,
} from '../../../../../common/rules/apm_rule_types';
import { transactionErrorRateParamsSchema } from '../../../../../common/rules/schema';
import { environmentQuery } from '../../../../../common/utils/environment_query';
import {
asDecimalOrInteger,
getAlertUrlTransaction,
} from '../../../../../common/utils/formatters';
import { getDocumentTypeFilterForTransactions } from '../../../../lib/helpers/transactions';
import { getApmIndices } from '../../../settings/apm_indices/get_apm_indices';
import { apmActionVariables } from '../../action_variables';
import { alertingEsClient } from '../../alerting_es_client';
import { RegisterRuleDependencies } from '../../register_apm_rule_types';
import { SearchAggregatedTransactionSetting } from '../../../../../common/aggregated_transactions';
import { getDocumentTypeFilterForTransactions } from '../../../../lib/helpers/transactions';
import {
getServiceGroupFields,
getServiceGroupFieldsAgg,
} from '../get_service_group_fields';
const paramsSchema = schema.object({
windowSize: schema.number(),
windowUnit: schema.string(),
threshold: schema.number(),
transactionType: schema.maybe(schema.string()),
serviceName: schema.maybe(schema.string()),
environment: schema.string(),
});
const ruleTypeConfig = RULE_TYPES_CONFIG[ApmRuleType.TransactionErrorRate];
export function registerTransactionErrorRateRuleType({
@ -81,9 +78,7 @@ export function registerTransactionErrorRateRuleType({
name: ruleTypeConfig.name,
actionGroups: ruleTypeConfig.actionGroups,
defaultActionGroupId: ruleTypeConfig.defaultActionGroupId,
validate: {
params: paramsSchema,
},
validate: { params: transactionErrorRateParamsSchema },
actionVariables: {
context: [
...(observability.getAlertDetailsConfig()?.apm.enabled
@ -285,7 +280,10 @@ export function registerTransactionErrorRateRuleType({
.scheduleActions(ruleTypeConfig.defaultActionGroupId, {
alertDetailsUrl,
environment: getEnvironmentLabel(environment),
interval: `${ruleParams.windowSize}${ruleParams.windowUnit}`,
interval: formatDurationFromTimeUnitChar(
ruleParams.windowSize,
ruleParams.windowUnit as TimeUnitChar
),
reason: reasonMessage,
serviceName,
threshold: ruleParams.threshold,

View file

@ -0,0 +1,117 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { SuperTest, Test } from 'supertest';
import { ApmRuleType } from '@kbn/apm-plugin/common/rules/apm_rule_types';
import { ApmRuleParamsType } from '@kbn/apm-plugin/common/rules/schema';
import { ApmDocumentType } from '@kbn/apm-plugin/common/document_type';
import { RollupInterval } from '@kbn/apm-plugin/common/rollup';
import { ApmApiClient } from '../../common/config';
export async function createIndexConnector({
supertest,
name,
indexName,
}: {
supertest: SuperTest<Test>;
name: string;
indexName: string;
}) {
const { body } = await supertest
.post(`/api/actions/connector`)
.set('kbn-xsrf', 'foo')
.send({
name,
config: {
index: indexName,
refresh: true,
},
connector_type_id: '.index',
});
return body.id as string;
}
export async function createApmRule<T extends ApmRuleType>({
supertest,
name,
ruleTypeId,
params,
actions = [],
}: {
supertest: SuperTest<Test>;
ruleTypeId: T;
name: string;
params: ApmRuleParamsType[T];
actions?: any[];
}) {
const { body } = await supertest
.post(`/api/alerting/rule`)
.set('kbn-xsrf', 'foo')
.send({
params,
consumer: 'apm',
schedule: {
interval: '1m',
},
tags: ['apm'],
name,
rule_type_id: ruleTypeId,
actions,
});
return body;
}
function getTimerange() {
return {
start: new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString(),
end: new Date(Date.now() + 5 * 60 * 1000).toISOString(),
};
}
export async function fetchServiceInventoryAlertCounts(apmApiClient: ApmApiClient) {
const timerange = getTimerange();
const serviceInventoryResponse = await apmApiClient.readUser({
endpoint: 'GET /internal/apm/services',
params: {
query: {
...timerange,
environment: 'ENVIRONMENT_ALL',
kuery: '',
probability: 1,
documentType: ApmDocumentType.ServiceTransactionMetric,
rollupInterval: RollupInterval.SixtyMinutes,
},
},
});
return serviceInventoryResponse.body.items.reduce<Record<string, number>>((acc, item) => {
return { ...acc, [item.serviceName]: item.alertsCount ?? 0 };
}, {});
}
export async function fetchServiceTabAlertCount({
apmApiClient,
serviceName,
}: {
apmApiClient: ApmApiClient;
serviceName: string;
}) {
const timerange = getTimerange();
const alertsCountReponse = await apmApiClient.readUser({
endpoint: 'GET /internal/apm/services/{serviceName}/alerts_count',
params: {
path: {
serviceName,
},
query: {
...timerange,
environment: 'ENVIRONMENT_ALL',
},
},
});
return alertsCountReponse.body.alertsCount;
}

View file

@ -9,8 +9,10 @@ import { ApmRuleType } from '@kbn/apm-plugin/common/rules/apm_rule_types';
import { apm, timerange } from '@kbn/apm-synthtrace-client';
import expect from '@kbn/expect';
import { range } from 'lodash';
import { ANOMALY_SEVERITY } from '@kbn/apm-plugin/common/ml_constants';
import { FtrProviderContext } from '../../common/ftr_provider_context';
import { createAndRunApmMlJobs } from '../../common/utils/create_and_run_apm_ml_jobs';
import { createApmRule } from './alerting_api_helper';
import { waitForRuleStatus } from './wait_for_rule_status';
export default function ApiTest({ getService }: FtrProviderContext) {
@ -18,7 +20,6 @@ export default function ApiTest({ getService }: FtrProviderContext) {
const supertest = getService('supertest');
const ml = getService('ml');
const log = getService('log');
const es = getService('es');
const synthtraceEsClient = getService('synthtraceEsClient');
@ -81,36 +82,29 @@ export default function ApiTest({ getService }: FtrProviderContext) {
});
it('checks if alert is active', async () => {
const { body: createdRule } = await supertest
.post(`/api/alerting/rule`)
.set('kbn-xsrf', 'foo')
.send({
params: {
environment: 'production',
windowSize: 99,
windowUnit: 'y',
anomalySeverityType: 'warning',
},
consumer: 'apm',
schedule: {
interval: '1m',
},
tags: ['apm', 'service.name:service-a'],
name: 'Latency anomaly | service-a',
rule_type_id: ApmRuleType.Anomaly,
notify_when: 'onActiveAlert',
actions: [],
});
const createdRule = await createApmRule({
supertest,
name: 'Latency anomaly | service-a',
params: {
environment: 'production',
windowSize: 99,
windowUnit: 'y',
anomalySeverityType: ANOMALY_SEVERITY.WARNING,
},
ruleTypeId: ApmRuleType.Anomaly,
});
ruleId = createdRule.id;
const executionStatus = await waitForRuleStatus({
id: ruleId,
expectedStatus: 'active',
supertest,
log,
});
expect(executionStatus.status).to.be('active');
if (!ruleId) {
expect(ruleId).to.not.eql(undefined);
} else {
const executionStatus = await waitForRuleStatus({
id: ruleId,
expectedStatus: 'active',
supertest,
});
expect(executionStatus.status).to.be('active');
}
});
});
}

View file

@ -0,0 +1,165 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { ApmRuleType } from '@kbn/apm-plugin/common/rules/apm_rule_types';
import { errorCountMessage } from '@kbn/apm-plugin/common/rules/default_action_message';
import { apm, timerange } from '@kbn/apm-synthtrace-client';
import expect from '@kbn/expect';
import { FtrProviderContext } from '../../common/ftr_provider_context';
import {
createApmRule,
createIndexConnector,
fetchServiceInventoryAlertCounts,
fetchServiceTabAlertCount,
} from './alerting_api_helper';
import { waitForRuleStatus, waitForDocumentInIndex } from './wait_for_rule_status';
export default function ApiTest({ getService }: FtrProviderContext) {
const registry = getService('registry');
const supertest = getService('supertest');
const es = getService('es');
const apmApiClient = getService('apmApiClient');
const esDeleteAllIndices = getService('esDeleteAllIndices');
const synthtraceEsClient = getService('synthtraceEsClient');
registry.when('error count threshold alert', { config: 'basic', archives: [] }, () => {
let ruleId: string;
let actionId: string | undefined;
const INDEX_NAME = 'error-count';
before(async () => {
const opbeansJava = apm
.service({ name: 'opbeans-java', environment: 'production', agentName: 'java' })
.instance('instance');
const opbeansNode = apm
.service({ name: 'opbeans-node', environment: 'production', agentName: 'node' })
.instance('instance');
const events = timerange('now-15m', 'now')
.ratePerMinute(1)
.generator((timestamp) => {
return [
opbeansJava
.transaction({ transactionName: 'tx-java' })
.timestamp(timestamp)
.duration(100)
.failure()
.errors(
opbeansJava
.error({ message: '[ResponseError] index_not_found_exception' })
.timestamp(timestamp + 50)
),
opbeansNode
.transaction({ transactionName: 'tx-node' })
.timestamp(timestamp)
.duration(100)
.success(),
];
});
await synthtraceEsClient.index(events);
});
after(async () => {
await synthtraceEsClient.clean();
await supertest.delete(`/api/alerting/rule/${ruleId}`).set('kbn-xsrf', 'foo');
await supertest.delete(`/api/actions/connector/${actionId}`).set('kbn-xsrf', 'foo');
await esDeleteAllIndices(['.alerts*', INDEX_NAME]);
await es.deleteByQuery({
index: '.kibana-event-log-*',
query: { term: { 'kibana.alert.rule.consumer': 'apm' } },
});
});
describe('create alert', () => {
before(async () => {
actionId = await createIndexConnector({
supertest,
name: 'Error count API test',
indexName: INDEX_NAME,
});
const createdRule = await createApmRule({
supertest,
ruleTypeId: ApmRuleType.ErrorCount,
name: 'Apm error count',
params: {
environment: 'production',
threshold: 1,
windowSize: 1,
windowUnit: 'h',
},
actions: [
{
group: 'threshold_met',
id: actionId,
params: {
documents: [{ message: errorCountMessage }],
},
frequency: {
notify_when: 'onActionGroupChange',
throttle: null,
summary: false,
},
},
],
});
expect(createdRule.id).to.not.eql(undefined);
ruleId = createdRule.id;
});
it('checks if alert is active', async () => {
const executionStatus = await waitForRuleStatus({
id: ruleId,
expectedStatus: 'active',
supertest,
});
expect(executionStatus.status).to.be('active');
});
it('returns correct message', async () => {
const resp = await waitForDocumentInIndex<{ message: string }>({
es,
indexName: INDEX_NAME,
});
expect(resp.hits.hits[0]._source?.message).eql(
`Apm error count alert is firing because of the following conditions:
- Service name: opbeans-java
- Environment: production
- Threshold: 1
- Triggered value: 15 errors over the last 1 hr`
);
});
it('shows the correct alert count for each service on service inventory', async () => {
const serviceInventoryAlertCounts = await fetchServiceInventoryAlertCounts(apmApiClient);
expect(serviceInventoryAlertCounts).to.eql({
'opbeans-node': 0,
'opbeans-java': 1,
});
});
it('shows the correct alert count in opbeans-java service', async () => {
const serviceTabAlertCount = await fetchServiceTabAlertCount({
apmApiClient,
serviceName: 'opbeans-java',
});
expect(serviceTabAlertCount).to.be(1);
});
it('shows the correct alert count in opbeans-node service', async () => {
const serviceTabAlertCount = await fetchServiceTabAlertCount({
apmApiClient,
serviceName: 'opbeans-node',
});
expect(serviceTabAlertCount).to.be(0);
});
});
});
}

View file

@ -4,53 +4,52 @@
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { ToolingLog } from '@kbn/tooling-log';
import expect from '@kbn/expect';
import type { Client } from '@elastic/elasticsearch';
import type {
AggregationsAggregate,
SearchResponse,
} from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import pRetry from 'p-retry';
import type SuperTest from 'supertest';
const WAIT_FOR_STATUS_INCREMENT = 500;
export async function waitForRuleStatus({
id,
expectedStatus,
waitMillis = 10000,
supertest,
log,
}: {
id: string;
expectedStatus: string;
supertest: SuperTest.SuperTest<SuperTest.Test>;
log: ToolingLog;
waitMillis?: number;
id?: string;
}): Promise<Record<string, any>> {
if (waitMillis < 0 || !id) {
expect().fail(`waiting for alert ${id} status ${expectedStatus} timed out`);
}
const response = await supertest.get(`/api/alerting/rule/${id}`);
expect(response.status).to.eql(200);
const { execution_status: executionStatus } = response.body || {};
const { status } = executionStatus || {};
const message = `waitForStatus(${expectedStatus}): got ${JSON.stringify(executionStatus)}`;
if (status === expectedStatus) {
return executionStatus;
}
log.debug(`${message}, retrying`);
await delay(WAIT_FOR_STATUS_INCREMENT);
return await waitForRuleStatus({
id,
expectedStatus,
waitMillis: waitMillis - WAIT_FOR_STATUS_INCREMENT,
supertest,
log,
});
return pRetry(
async () => {
const response = await supertest.get(`/api/alerting/rule/${id}`);
const { execution_status: executionStatus } = response.body || {};
const { status } = executionStatus || {};
if (status !== expectedStatus) {
throw new Error(`waitForStatus(${expectedStatus}): got ${status}`);
}
return executionStatus;
},
{ retries: 10 }
);
}
async function delay(millis: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, millis));
export async function waitForDocumentInIndex<T>({
es,
indexName,
}: {
es: Client;
indexName: string;
}): Promise<SearchResponse<T, Record<string, AggregationsAggregate>>> {
return pRetry(
async () => {
const response = await es.search<T>({ index: indexName });
if (response.hits.hits.length === 0) {
throw new Error('No hits found');
}
return response;
},
{ retries: 10 }
);
}

View file

@ -4,10 +4,11 @@
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { ApmRuleType } from '@kbn/apm-plugin/common/rules/apm_rule_types';
import { AggregationType, ApmRuleType } from '@kbn/apm-plugin/common/rules/apm_rule_types';
import expect from '@kbn/expect';
import { FtrProviderContext } from '../../../common/ftr_provider_context';
import { waitForActiveAlert } from '../../../common/utils/wait_for_active_alert';
import { createApmRule } from '../../alerts/alerting_api_helper';
import {
createServiceGroupApi,
deleteAllServiceGroups,
@ -26,28 +27,21 @@ export default function ApiTest({ getService }: FtrProviderContext) {
const start = Date.now() - 24 * 60 * 60 * 1000;
const end = Date.now();
async function createRule() {
return supertest
.post(`/api/alerting/rule`)
.set('kbn-xsrf', 'true')
.send({
params: {
serviceName: 'synth-go',
transactionType: '',
windowSize: 99,
windowUnit: 'y',
threshold: 100,
aggregationType: 'avg',
environment: 'testing',
},
consumer: 'apm',
schedule: { interval: '1m' },
tags: ['apm'],
name: 'Latency threshold | synth-go',
rule_type_id: ApmRuleType.TransactionDuration,
notify_when: 'onActiveAlert',
actions: [],
});
function createRule() {
return createApmRule({
supertest,
name: 'Latency threshold | synth-go',
params: {
serviceName: 'synth-go',
transactionType: '',
windowSize: 99,
windowUnit: 'y',
threshold: 100,
aggregationType: AggregationType.Avg,
environment: 'testing',
},
ruleTypeId: ApmRuleType.TransactionDuration,
});
}
registry.when('Service group counts', { config: 'basic', archives: [] }, () => {
@ -89,7 +83,7 @@ export default function ApiTest({ getService }: FtrProviderContext) {
describe('with alerts', () => {
let ruleId: string;
before(async () => {
const { body: createdRule } = await createRule();
const createdRule = await createRule();
ruleId = createdRule.id;
await waitForActiveAlert({ ruleId, esClient, log });
});

View file

@ -5,10 +5,11 @@
* 2.0.
*/
import expect from '@kbn/expect';
import { ApmRuleType } from '@kbn/apm-plugin/common/rules/apm_rule_types';
import { AggregationType, ApmRuleType } from '@kbn/apm-plugin/common/rules/apm_rule_types';
import { apm, timerange } from '@kbn/apm-synthtrace-client';
import { FtrProviderContext } from '../../common/ftr_provider_context';
import { waitForActiveAlert } from '../../common/utils/wait_for_active_alert';
import { createApmRule } from '../alerts/alerting_api_helper';
export default function ServiceAlerts({ getService }: FtrProviderContext) {
const registry = getService('registry');
@ -42,28 +43,21 @@ export default function ServiceAlerts({ getService }: FtrProviderContext) {
});
}
async function createRule() {
return supertest
.post(`/api/alerting/rule`)
.set('kbn-xsrf', 'true')
.send({
params: {
serviceName: goService,
transactionType: '',
windowSize: 99,
windowUnit: 'y',
threshold: 100,
aggregationType: 'avg',
environment: 'testing',
},
consumer: 'apm',
schedule: { interval: '1m' },
tags: ['apm'],
name: `Latency threshold | ${goService}`,
rule_type_id: ApmRuleType.TransactionDuration,
notify_when: 'onActiveAlert',
actions: [],
});
function createRule() {
return createApmRule({
supertest,
name: `Latency threshold | ${goService}`,
params: {
serviceName: goService,
transactionType: '',
windowSize: 99,
windowUnit: 'y',
threshold: 100,
aggregationType: AggregationType.Avg,
environment: 'testing',
},
ruleTypeId: ApmRuleType.TransactionDuration,
});
}
registry.when('Service alerts', { config: 'basic', archives: [] }, () => {
@ -121,7 +115,7 @@ export default function ServiceAlerts({ getService }: FtrProviderContext) {
describe('with alerts', () => {
let ruleId: string;
before(async () => {
const { body: createdRule } = await createRule();
const createdRule = await createRule();
ruleId = createdRule.id;
await waitForActiveAlert({ ruleId, esClient, log });
});