mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 01:38:56 -04:00
Add error.grouping_name to group alerts in Error Count rule (#161810)
Resolves https://github.com/elastic/actionable-observability/issues/70 For the APM Error Count rule - - Added `error.grouping_name` in the index mapping of AAD index - Added `error.grouping_name` in the alert document in AAD index - Added `errorGroupingName` in the list of action variables I discussed with @simianhacker regarding the alert instance ID having space/quotes with introduction of `errorGroupingName`. It appears that using `errorGroupingName` as is should not be an issue and so we don't need to modify or hash it. ## Group by dropdown <img width="604" alt="Screenshot 2023-07-13 at 17 27 44" src="d9ab1a8d
-3272-4c36-8c71-a7163a024249"> ## Reason message <img width="755" alt="Screenshot 2023-07-13 at 17 38 31" src="dbe4a86b
-812b-4068-abea-4b96fa5fb38b"> ## Index mapping <img width="1514" alt="Screenshot 2023-07-13 at 17 40 32" src="f1e48045
-a7a8-4044-bc33-f4d34dc1c8cc"> ## Alert document <img width="681" alt="Screenshot 2023-07-13 at 17 39 46" src="985cf003
-ac32-4c7e-9f2a-5bda033c194b"> ## Action variable <img width="612" alt="Screenshot 2023-07-13 at 17 43 13" src="2edfb388
-f99d-4cae-98ef-3e9b275bb848"> ## Alert notification <img width="650" alt="Screenshot 2023-07-13 at 17 41 37" src="c057a3a1
-dc6e-4fee-97ad-5790ab3c531b">
This commit is contained in:
parent
8bb85ae594
commit
73ce87a0a9
12 changed files with 153 additions and 4 deletions
|
@ -75,6 +75,7 @@ const ObservabilityApmAlertOptional = rt.partial({
|
|||
}),
|
||||
error: rt.partial({
|
||||
grouping_key: schemaString,
|
||||
grouping_name: schemaString,
|
||||
}),
|
||||
kibana: rt.partial({
|
||||
alert: rt.partial({
|
||||
|
|
|
@ -82,6 +82,8 @@ Array [
|
|||
|
||||
exports[`Error ERROR_GROUP_ID 1`] = `"grouping key"`;
|
||||
|
||||
exports[`Error ERROR_GROUP_NAME 1`] = `undefined`;
|
||||
|
||||
exports[`Error ERROR_ID 1`] = `"error id"`;
|
||||
|
||||
exports[`Error ERROR_LOG_LEVEL 1`] = `undefined`;
|
||||
|
@ -411,6 +413,8 @@ exports[`Span ERROR_EXCEPTION 1`] = `undefined`;
|
|||
|
||||
exports[`Span ERROR_GROUP_ID 1`] = `undefined`;
|
||||
|
||||
exports[`Span ERROR_GROUP_NAME 1`] = `undefined`;
|
||||
|
||||
exports[`Span ERROR_ID 1`] = `undefined`;
|
||||
|
||||
exports[`Span ERROR_LOG_LEVEL 1`] = `undefined`;
|
||||
|
@ -736,6 +740,8 @@ exports[`Transaction ERROR_EXCEPTION 1`] = `undefined`;
|
|||
|
||||
exports[`Transaction ERROR_GROUP_ID 1`] = `undefined`;
|
||||
|
||||
exports[`Transaction ERROR_GROUP_NAME 1`] = `undefined`;
|
||||
|
||||
exports[`Transaction ERROR_ID 1`] = `undefined`;
|
||||
|
||||
exports[`Transaction ERROR_LOG_LEVEL 1`] = `undefined`;
|
||||
|
|
|
@ -101,6 +101,7 @@ export const PARENT_ID = 'parent.id';
|
|||
|
||||
export const ERROR_ID = 'error.id';
|
||||
export const ERROR_GROUP_ID = 'error.grouping_key';
|
||||
export const ERROR_GROUP_NAME = 'error.grouping_name';
|
||||
export const ERROR_CULPRIT = 'error.culprit';
|
||||
export const ERROR_LOG_LEVEL = 'error.log.level';
|
||||
export const ERROR_LOG_MESSAGE = 'error.log.message';
|
||||
|
|
|
@ -18,6 +18,7 @@ import { ML_ANOMALY_SEVERITY } from '@kbn/ml-anomaly-utils/anomaly_severity';
|
|||
import { ML_ANOMALY_THRESHOLD } from '@kbn/ml-anomaly-utils/anomaly_threshold';
|
||||
import {
|
||||
ERROR_GROUP_ID,
|
||||
ERROR_GROUP_NAME,
|
||||
SERVICE_ENVIRONMENT,
|
||||
SERVICE_NAME,
|
||||
TRANSACTION_NAME,
|
||||
|
@ -61,6 +62,8 @@ const getFieldNameLabel = (field: string): string => {
|
|||
return 'name';
|
||||
case ERROR_GROUP_ID:
|
||||
return 'error key';
|
||||
case ERROR_GROUP_NAME:
|
||||
return 'error name';
|
||||
default:
|
||||
return field;
|
||||
}
|
||||
|
|
|
@ -39,6 +39,7 @@ import {
|
|||
SERVICE_NAME,
|
||||
TRANSACTION_NAME,
|
||||
ERROR_GROUP_ID,
|
||||
ERROR_GROUP_NAME,
|
||||
} from '../../../../../common/es_fields/apm';
|
||||
import {
|
||||
ErrorState,
|
||||
|
@ -218,7 +219,7 @@ export function ErrorCountRuleType(props: Props) {
|
|||
<APMRuleGroupBy
|
||||
onChange={onGroupByChange}
|
||||
options={{ groupBy: ruleParams.groupBy }}
|
||||
fields={[TRANSACTION_NAME, ERROR_GROUP_ID]}
|
||||
fields={[TRANSACTION_NAME, ERROR_GROUP_ID, ERROR_GROUP_NAME]}
|
||||
preSelectedOptions={[SERVICE_NAME, SERVICE_ENVIRONMENT]}
|
||||
/>
|
||||
</EuiFormRow>
|
||||
|
|
|
@ -102,4 +102,13 @@ export const apmActionVariables = {
|
|||
),
|
||||
name: 'errorGroupingKey' as const,
|
||||
},
|
||||
errorGroupingName: {
|
||||
description: i18n.translate(
|
||||
'xpack.apm.alerts.action_variables.errorGroupingName',
|
||||
{
|
||||
defaultMessage: 'The error grouping name the alert is created for',
|
||||
}
|
||||
),
|
||||
name: 'errorGroupingName' as const,
|
||||
},
|
||||
};
|
||||
|
|
|
@ -20,6 +20,7 @@ import { legacyExperimentalFieldMap } from '@kbn/alerts-as-data-utils';
|
|||
import {
|
||||
AGENT_NAME,
|
||||
ERROR_GROUP_ID,
|
||||
ERROR_GROUP_NAME,
|
||||
PROCESSOR_EVENT,
|
||||
SERVICE_ENVIRONMENT,
|
||||
SERVICE_LANGUAGE_NAME,
|
||||
|
@ -57,6 +58,10 @@ export const apmRuleTypeAlertFieldMap = {
|
|||
type: 'keyword',
|
||||
required: false,
|
||||
},
|
||||
[ERROR_GROUP_NAME]: {
|
||||
type: 'keyword',
|
||||
required: false,
|
||||
},
|
||||
[PROCESSOR_EVENT]: {
|
||||
type: 'keyword',
|
||||
required: false,
|
||||
|
|
|
@ -584,4 +584,117 @@ describe('Error count alert', () => {
|
|||
alertDetailsUrl: 'mockedAlertsLocator > getLocation',
|
||||
});
|
||||
});
|
||||
|
||||
it('sends alert when rule is configured with group by on error.grouping_key and error.grouping_name', async () => {
|
||||
const { services, dependencies, executor, scheduleActions } =
|
||||
createRuleTypeMocks();
|
||||
|
||||
registerErrorCountRuleType(dependencies);
|
||||
|
||||
const params = {
|
||||
threshold: 2,
|
||||
windowSize: 5,
|
||||
windowUnit: 'm',
|
||||
groupBy: [
|
||||
'service.name',
|
||||
'service.environment',
|
||||
'error.grouping_key',
|
||||
'error.grouping_name',
|
||||
],
|
||||
};
|
||||
|
||||
services.scopedClusterClient.asCurrentUser.search.mockResponse({
|
||||
hits: {
|
||||
hits: [],
|
||||
total: {
|
||||
relation: 'eq',
|
||||
value: 2,
|
||||
},
|
||||
},
|
||||
aggregations: {
|
||||
error_counts: {
|
||||
buckets: [
|
||||
{
|
||||
key: ['foo', 'env-foo', 'error-key-foo', 'error-name-foo'],
|
||||
doc_count: 5,
|
||||
},
|
||||
{
|
||||
key: ['foo', 'env-foo-2', 'error-key-foo-2', 'error-name-foo2'],
|
||||
doc_count: 4,
|
||||
},
|
||||
{
|
||||
key: ['bar', 'env-bar', 'error-key-bar', 'error-name-bar'],
|
||||
doc_count: 3,
|
||||
},
|
||||
{
|
||||
key: ['bar', 'env-bar-2', 'error-key-bar-2', 'error-name-bar2'],
|
||||
doc_count: 1,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
took: 0,
|
||||
timed_out: false,
|
||||
_shards: {
|
||||
failed: 0,
|
||||
skipped: 0,
|
||||
successful: 1,
|
||||
total: 1,
|
||||
},
|
||||
});
|
||||
|
||||
await executor({ params });
|
||||
[
|
||||
'foo_env-foo_error-key-foo_error-name-foo',
|
||||
'foo_env-foo-2_error-key-foo-2_error-name-foo2',
|
||||
'bar_env-bar_error-key-bar_error-name-bar',
|
||||
].forEach((instanceName) =>
|
||||
expect(services.alertFactory.create).toHaveBeenCalledWith(instanceName)
|
||||
);
|
||||
|
||||
expect(scheduleActions).toHaveBeenCalledTimes(3);
|
||||
|
||||
expect(scheduleActions).toHaveBeenCalledWith('threshold_met', {
|
||||
serviceName: 'foo',
|
||||
environment: 'env-foo',
|
||||
threshold: 2,
|
||||
triggerValue: 5,
|
||||
reason:
|
||||
'Error count is 5 in the last 5 mins for service: foo, env: env-foo, error key: error-key-foo, error name: error-name-foo. Alert when > 2.',
|
||||
interval: '5 mins',
|
||||
viewInAppUrl:
|
||||
'http://localhost:5601/eyr/app/apm/services/foo/errors?environment=env-foo',
|
||||
errorGroupingKey: 'error-key-foo',
|
||||
errorGroupingName: 'error-name-foo',
|
||||
alertDetailsUrl: 'mockedAlertsLocator > getLocation',
|
||||
});
|
||||
expect(scheduleActions).toHaveBeenCalledWith('threshold_met', {
|
||||
serviceName: 'foo',
|
||||
environment: 'env-foo-2',
|
||||
threshold: 2,
|
||||
triggerValue: 4,
|
||||
reason:
|
||||
'Error count is 4 in the last 5 mins for service: foo, env: env-foo-2, error key: error-key-foo-2, error name: error-name-foo2. Alert when > 2.',
|
||||
interval: '5 mins',
|
||||
viewInAppUrl:
|
||||
'http://localhost:5601/eyr/app/apm/services/foo/errors?environment=env-foo-2',
|
||||
errorGroupingKey: 'error-key-foo-2',
|
||||
errorGroupingName: 'error-name-foo2',
|
||||
alertDetailsUrl: 'mockedAlertsLocator > getLocation',
|
||||
});
|
||||
expect(scheduleActions).toHaveBeenCalledWith('threshold_met', {
|
||||
serviceName: 'bar',
|
||||
environment: 'env-bar',
|
||||
reason:
|
||||
'Error count is 3 in the last 5 mins for service: bar, env: env-bar, error key: error-key-bar, error name: error-name-bar. Alert when > 2.',
|
||||
threshold: 2,
|
||||
triggerValue: 3,
|
||||
interval: '5 mins',
|
||||
viewInAppUrl:
|
||||
'http://localhost:5601/eyr/app/apm/services/bar/errors?environment=env-bar',
|
||||
errorGroupingKey: 'error-key-bar',
|
||||
errorGroupingName: 'error-name-bar',
|
||||
alertDetailsUrl: 'mockedAlertsLocator > getLocation',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -83,6 +83,7 @@ export function registerErrorCountRuleType({
|
|||
apmActionVariables.serviceName,
|
||||
apmActionVariables.transactionName,
|
||||
apmActionVariables.errorGroupingKey,
|
||||
apmActionVariables.errorGroupingName,
|
||||
apmActionVariables.threshold,
|
||||
apmActionVariables.triggerValue,
|
||||
apmActionVariables.viewInAppUrl,
|
||||
|
|
|
@ -15,11 +15,13 @@ describe('getGroupByActionVariables', () => {
|
|||
'transaction.type': 'request',
|
||||
'transaction.name': 'tx-java',
|
||||
'error.grouping_key': 'error-key-0',
|
||||
'error.grouping_name': 'error-name-0',
|
||||
});
|
||||
expect(result).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"environment": "development",
|
||||
"errorGroupingKey": "error-key-0",
|
||||
"errorGroupingName": "error-name-0",
|
||||
"serviceName": "opbeans-java",
|
||||
"transactionName": "tx-java",
|
||||
"transactionType": "request",
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
import { getFieldValueLabel } from '../../../../../common/rules/apm_rule_types';
|
||||
import {
|
||||
ERROR_GROUP_ID,
|
||||
ERROR_GROUP_NAME,
|
||||
SERVICE_ENVIRONMENT,
|
||||
SERVICE_NAME,
|
||||
TRANSACTION_NAME,
|
||||
|
@ -26,6 +27,8 @@ const renameActionVariable = (field: string): string => {
|
|||
return 'transactionName';
|
||||
case ERROR_GROUP_ID:
|
||||
return 'errorGroupingKey';
|
||||
case ERROR_GROUP_NAME:
|
||||
return 'errorGroupingName';
|
||||
default:
|
||||
return field;
|
||||
}
|
||||
|
|
|
@ -109,6 +109,7 @@ export default function ApiTest({ getService }: FtrProviderContext) {
|
|||
'service.environment',
|
||||
'transaction.name',
|
||||
'error.grouping_key',
|
||||
'error.grouping_name',
|
||||
],
|
||||
},
|
||||
actions: [
|
||||
|
@ -120,7 +121,8 @@ export default function ApiTest({ getService }: FtrProviderContext) {
|
|||
{
|
||||
message: `${errorCountMessage}
|
||||
- Transaction name: {{context.transactionName}}
|
||||
- Error grouping key: {{context.errorGroupingKey}}`,
|
||||
- Error grouping key: {{context.errorGroupingKey}}
|
||||
- Error grouping name: {{context.errorGroupingName}}`,
|
||||
},
|
||||
],
|
||||
},
|
||||
|
@ -158,6 +160,7 @@ export default function ApiTest({ getService }: FtrProviderContext) {
|
|||
expect(resp.hits.hits[0]._source).property('service.environment', 'production');
|
||||
expect(resp.hits.hits[0]._source).property('transaction.name', 'tx-java');
|
||||
expect(resp.hits.hits[0]._source).property('error.grouping_key', errorGroupingKey);
|
||||
expect(resp.hits.hits[0]._source).property('error.grouping_name', errorMessage);
|
||||
});
|
||||
|
||||
it('returns correct message', async () => {
|
||||
|
@ -168,7 +171,7 @@ export default function ApiTest({ getService }: FtrProviderContext) {
|
|||
});
|
||||
|
||||
expect(resp.hits.hits[0]._source?.message).eql(
|
||||
`Error count is 15 in the last 1 hr for service: opbeans-java, env: production, name: tx-java, error key: ${errorGroupingKey}. Alert when > 1.
|
||||
`Error count is 15 in the last 1 hr for service: opbeans-java, env: production, name: tx-java, error key: ${errorGroupingKey}, error name: ${errorMessage}. Alert when > 1.
|
||||
|
||||
Apm error count is active with the following conditions:
|
||||
|
||||
|
@ -180,7 +183,8 @@ Apm error count is active with the following conditions:
|
|||
[View alert details](http://mockedpublicbaseurl/app/observability/alerts?_a=(kuery:%27kibana.alert.uuid:%20%22${alertId}%22%27%2CrangeFrom:%27${rangeFrom}%27%2CrangeTo:now%2Cstatus:all))
|
||||
|
||||
- Transaction name: tx-java
|
||||
- Error grouping key: ${errorGroupingKey}`
|
||||
- Error grouping key: ${errorGroupingKey}
|
||||
- Error grouping name: ${errorMessage}`
|
||||
);
|
||||
});
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue