[Response Ops] Replace scripted metric aggs in alerting telemetry queries with terms aggregations (#134769)

* Updating getTotalCountAggregations query

* Replacing scripted metric aggs with terms aggregations

* Fixing task manager query

* Updating replaceDotSymbols fn

* Adding stack trace to logger meta

* Reusing event log query

* Adding fallback for bucket key and doc_count

* Switch reduce for for loop

* combining aggs

* Fixing nulls issue

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Ying Mao 2022-06-29 10:24:57 -04:00 committed by GitHub
parent 8440cec9a6
commit 4b7b363e9c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 3267 additions and 1706 deletions

View file

@ -1,725 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
/* eslint-disable @typescript-eslint/naming-convention */
// eslint-disable-next-line @kbn/eslint/no-restricted-paths
import { elasticsearchClientMock } from '@kbn/core/server/elasticsearch/client/mocks';
import { loggingSystemMock } from '@kbn/core/server/mocks';
import {
getTotalCountAggregations,
getTotalCountInUse,
getExecutionsPerDayCount,
getExecutionTimeoutsPerDayCount,
getFailedAndUnrecognizedTasksPerDay,
parsePercentileAggsByRuleType,
} from './alerting_telemetry';
const mockLogger = loggingSystemMock.create().get();
describe('alerting telemetry', () => {
test('getTotalCountInUse should replace "." symbols with "__" in rule types names', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockResponse(
// @ts-expect-error @elastic/elasticsearch Aggregate only allows unknown values
{
aggregations: {
byRuleTypeId: {
value: {
ruleTypes: {
'.index-threshold': 2,
'logs.alert.document.count': 1,
'document.test.': 1,
},
namespaces: {
default: 1,
},
},
},
},
hits: {
hits: [],
},
}
);
const telemetry = await getTotalCountInUse(mockEsClient, 'test', mockLogger);
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
expect(telemetry).toMatchInlineSnapshot(`
Object {
"countByType": Object {
"__index-threshold": 2,
"document__test__": 1,
"logs__alert__document__count": 1,
},
"countNamespaces": 1,
"countTotal": 4,
}
`);
});
test('getTotalCountInUse should return empty results if query throws error', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockRejectedValue(new Error('oh no'));
const telemetry = await getTotalCountInUse(mockEsClient, 'test', mockLogger);
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
expect(mockLogger.warn).toHaveBeenCalledWith(
`Error executing alerting telemetry task: getTotalCountInUse - {}`
);
expect(telemetry).toMatchInlineSnapshot(`
Object {
"countByType": Object {},
"countNamespaces": 0,
"countTotal": 0,
}
`);
});
test('getTotalCountAggregations should return min/max connectors in use', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockResponse(
// @ts-expect-error @elastic/elasticsearch Aggregate only allows unknown values
{
aggregations: {
byRuleTypeId: {
value: {
ruleTypes: {
'.index-threshold': 2,
'logs.alert.document.count': 1,
'document.test.': 1,
},
},
},
max_throttle_time: { value: 60 },
min_throttle_time: { value: 0 },
avg_throttle_time: { value: 30 },
max_interval_time: { value: 10 },
min_interval_time: { value: 1 },
avg_interval_time: { value: 4.5 },
max_actions_count: { value: 4 },
min_actions_count: { value: 0 },
avg_actions_count: { value: 2.5 },
},
hits: {
hits: [],
},
}
);
const telemetry = await getTotalCountAggregations(mockEsClient, 'test', mockLogger);
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
expect(telemetry).toMatchInlineSnapshot(`
Object {
"connectors_per_alert": Object {
"avg": 2.5,
"max": 4,
"min": 0,
},
"count_by_type": Object {
"__index-threshold": 2,
"document__test__": 1,
"logs__alert__document__count": 1,
},
"count_rules_namespaces": 0,
"count_total": 4,
"schedule_time": Object {
"avg": "4.5s",
"max": "10s",
"min": "1s",
},
"schedule_time_number_s": Object {
"avg": 4.5,
"max": 10,
"min": 1,
},
"throttle_time": Object {
"avg": "30s",
"max": "60s",
"min": "0s",
},
"throttle_time_number_s": Object {
"avg": 30,
"max": 60,
"min": 0,
},
}
`);
});
test('getTotalCountAggregations should return empty results if query throws error', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockRejectedValue(new Error('oh no'));
const telemetry = await getTotalCountAggregations(mockEsClient, 'test', mockLogger);
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
expect(mockLogger.warn).toHaveBeenCalledWith(
`Error executing alerting telemetry task: getTotalCountAggregations - {}`
);
expect(telemetry).toMatchInlineSnapshot(`
Object {
"connectors_per_alert": Object {
"avg": 0,
"max": 0,
"min": 0,
},
"count_by_type": Object {},
"count_rules_namespaces": 0,
"count_total": 0,
"schedule_time": Object {
"avg": "0s",
"max": "0s",
"min": "0s",
},
"schedule_time_number_s": Object {
"avg": 0,
"max": 0,
"min": 0,
},
"throttle_time": Object {
"avg": "0s",
"max": "0s",
"min": "0s",
},
"throttle_time_number_s": Object {
"avg": 0,
"max": 0,
"min": 0,
},
}
`);
});
test('getExecutionsPerDayCount should return execution aggregations for total count, count by rule type and number of failed executions', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockResponse(
// @ts-expect-error @elastic/elasticsearch Aggregate only allows unknown values
{
aggregations: {
byRuleTypeId: {
value: {
ruleTypes: {
'.index-threshold': 2,
'logs.alert.document.count': 1,
'document.test.': 1,
},
ruleTypesDuration: {
'.index-threshold': 2087868,
'logs.alert.document.count': 1675765,
'document.test.': 17687687,
},
ruleTypesEsSearchDuration: {
'.index-threshold': 23,
'logs.alert.document.count': 526,
'document.test.': 534,
},
ruleTypesTotalSearchDuration: {
'.index-threshold': 62,
'logs.alert.document.count': 588,
'document.test.': 637,
},
},
},
failuresByReason: {
value: {
reasons: {
unknown: {
'.index-threshold': 2,
'logs.alert.document.count': 1,
'document.test.': 1,
},
},
},
},
avgDuration: { value: 10 },
avgEsSearchDuration: {
value: 25.785714285714285,
},
avgTotalSearchDuration: {
value: 30.642857142857142,
},
percentileScheduledActions: {
values: {
'50.0': 4.0,
'90.0': 26.0,
'99.0': 26.0,
},
},
percentileAlerts: {
values: {
'50.0': 10.0,
'90.0': 22.0,
'99.0': 22.0,
},
},
aggsByType: {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [
{
key: '.index-threshold',
doc_count: 149,
percentileScheduledActions: {
values: {
'50.0': 4.0,
'90.0': 26.0,
'99.0': 26.0,
},
},
percentileAlerts: {
values: {
'50.0': 10.0,
'90.0': 22.0,
'99.0': 22.0,
},
},
},
{
key: 'logs.alert.document.count',
doc_count: 1,
percentileScheduledActions: {
values: {
'50.0': 10.0,
'90.0': 10.0,
'99.0': 10.0,
},
},
percentileAlerts: {
values: {
'50.0': 5.0,
'90.0': 13.0,
'99.0': 13.0,
},
},
},
],
},
},
hits: {
hits: [],
},
}
);
const telemetry = await getExecutionsPerDayCount(mockEsClient, 'test', mockLogger);
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
expect(telemetry).toStrictEqual({
avgEsSearchDuration: 26,
avgEsSearchDurationByType: {
'__index-threshold': 12,
document__test__: 534,
logs__alert__document__count: 526,
},
avgExecutionTime: 0,
avgExecutionTimeByType: {
'__index-threshold': 1043934,
document__test__: 17687687,
logs__alert__document__count: 1675765,
},
avgTotalSearchDuration: 31,
avgTotalSearchDurationByType: {
'__index-threshold': 31,
document__test__: 637,
logs__alert__document__count: 588,
},
countByType: {
'__index-threshold': 2,
document__test__: 1,
logs__alert__document__count: 1,
},
countFailuresByReason: {
unknown: 4,
},
countFailuresByReasonByType: {
unknown: {
'__index-threshold': 2,
document__test__: 1,
logs__alert__document__count: 1,
},
},
countTotal: 4,
countTotalFailures: 4,
generatedActionsPercentiles: {
p50: 4,
p90: 26,
p99: 26,
},
generatedActionsPercentilesByType: {
p50: {
'__index-threshold': 4,
logs__alert__document__count: 10,
},
p90: {
'__index-threshold': 26,
logs__alert__document__count: 10,
},
p99: {
'__index-threshold': 26,
logs__alert__document__count: 10,
},
},
alertsPercentiles: {
p50: 10,
p90: 22,
p99: 22,
},
alertsPercentilesByType: {
p50: {
'__index-threshold': 10,
logs__alert__document__count: 5,
},
p90: {
'__index-threshold': 22,
logs__alert__document__count: 13,
},
p99: {
'__index-threshold': 22,
logs__alert__document__count: 13,
},
},
});
});
test('getExecutionsPerDayCount should return empty results if query throws error', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockRejectedValue(new Error('oh no'));
const telemetry = await getExecutionsPerDayCount(mockEsClient, 'test', mockLogger);
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
expect(mockLogger.warn).toHaveBeenCalledWith(
`Error executing alerting telemetry task: getExecutionsPerDayCount - {}`
);
expect(telemetry).toStrictEqual({
avgEsSearchDuration: 0,
avgEsSearchDurationByType: {},
avgExecutionTime: 0,
avgExecutionTimeByType: {},
avgTotalSearchDuration: 0,
avgTotalSearchDurationByType: {},
countByType: {},
countFailuresByReason: {},
countFailuresByReasonByType: {},
countTotal: 0,
countTotalFailures: 0,
generatedActionsPercentiles: {},
generatedActionsPercentilesByType: {},
alertsPercentiles: {},
alertsPercentilesByType: {},
});
});
test('getExecutionTimeoutsPerDayCount should return execution aggregations for total timeout count and count by rule type', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockResponse(
// @ts-expect-error @elastic/elasticsearch Aggregate only allows unknown values
{
aggregations: {
byRuleTypeId: {
value: {
ruleTypes: {
'.index-threshold': 2,
'logs.alert.document.count': 1,
'document.test.': 1,
},
},
},
},
hits: {
hits: [],
},
}
);
const telemetry = await getExecutionTimeoutsPerDayCount(mockEsClient, 'test', mockLogger);
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
expect(telemetry).toStrictEqual({
countTotal: 4,
countByType: {
'__index-threshold': 2,
document__test__: 1,
logs__alert__document__count: 1,
},
});
});
test('getExecutionTimeoutsPerDayCount should return empty results if query throws error', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockRejectedValue(new Error('oh no'));
const telemetry = await getExecutionTimeoutsPerDayCount(mockEsClient, 'test', mockLogger);
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
expect(mockLogger.warn).toHaveBeenCalledWith(
`Error executing alerting telemetry task: getExecutionsPerDayCount - {}`
);
expect(telemetry).toStrictEqual({
countTotal: 0,
countByType: {},
});
});
test('getFailedAndUnrecognizedTasksPerDay should aggregations for total count, count by status and count by status and rule type for failed and unrecognized tasks', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockResponse(
// @ts-expect-error @elastic/elasticsearch Aggregate only allows unknown values
{
aggregations: {
byTaskTypeId: {
value: {
statuses: {
failed: {
'.index-threshold': 2,
'logs.alert.document.count': 1,
'document.test.': 1,
},
unrecognized: {
'o.l.d.task-type': 1,
},
},
},
},
},
hits: {
hits: [],
},
}
);
const telemetry = await getFailedAndUnrecognizedTasksPerDay(mockEsClient, 'test', mockLogger);
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
expect(telemetry).toStrictEqual({
countByStatus: {
failed: 4,
unrecognized: 1,
},
countByStatusByRuleType: {
failed: {
'__index-threshold': 2,
document__test__: 1,
logs__alert__document__count: 1,
},
unrecognized: {
'o__l__d__task-type': 1,
},
},
countTotal: 5,
});
});
test('getFailedAndUnrecognizedTasksPerDay should return empty results if query throws error', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockRejectedValue(new Error('oh no'));
const telemetry = await getFailedAndUnrecognizedTasksPerDay(mockEsClient, 'test', mockLogger);
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
expect(mockLogger.warn).toHaveBeenCalledWith(
`Error executing alerting telemetry task: getFailedAndUnrecognizedTasksPerDay - {}`
);
expect(telemetry).toStrictEqual({
countByStatus: {},
countByStatusByRuleType: {},
countTotal: 0,
});
});
test('parsePercentileAggsByRuleType', () => {
const aggsByType = {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [
{
key: '.index-threshold',
doc_count: 149,
percentileScheduledActions: {
values: {
'50.0': 4.0,
'90.0': 26.0,
'99.0': 26.0,
},
},
percentileAlerts: {
values: {
'50.0': 3.0,
'90.0': 22.0,
'99.0': 22.0,
},
},
},
{
key: 'logs.alert.document.count',
doc_count: 1,
percentileScheduledActions: {
values: {
'50.0': 10.0,
'90.0': 10.0,
'99.0': 10.0,
},
},
percentileAlerts: {
values: {
'50.0': 5.0,
'90.0': 16.0,
'99.0': 16.0,
},
},
},
{
key: 'document.test.',
doc_count: 1,
percentileScheduledActions: {
values: {
'50.0': null,
'90.0': null,
'99.0': null,
},
},
percentileAlerts: {
values: {
'50.0': null,
'90.0': null,
'99.0': null,
},
},
},
],
};
expect(
parsePercentileAggsByRuleType(aggsByType.buckets, 'percentileScheduledActions.values')
).toEqual({
p50: {
'__index-threshold': 4,
document__test__: 0,
logs__alert__document__count: 10,
},
p90: {
'__index-threshold': 26,
document__test__: 0,
logs__alert__document__count: 10,
},
p99: {
'__index-threshold': 26,
document__test__: 0,
logs__alert__document__count: 10,
},
});
expect(parsePercentileAggsByRuleType(aggsByType.buckets, 'percentileAlerts.values')).toEqual({
p50: {
'__index-threshold': 3,
document__test__: 0,
logs__alert__document__count: 5,
},
p90: {
'__index-threshold': 22,
document__test__: 0,
logs__alert__document__count: 16,
},
p99: {
'__index-threshold': 22,
document__test__: 0,
logs__alert__document__count: 16,
},
});
});
test('parsePercentileAggsByRuleType handles unknown path', () => {
const aggsByType = {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [
{
key: '.index-threshold',
doc_count: 149,
percentileScheduledActions: {
values: {
'50.0': 4.0,
'90.0': 26.0,
'99.0': 26.0,
},
},
},
{
key: 'logs.alert.document.count',
doc_count: 1,
percentileScheduledActions: {
values: {
'50.0': 10.0,
'90.0': 10.0,
'99.0': 10.0,
},
},
},
],
};
expect(parsePercentileAggsByRuleType(aggsByType.buckets, 'foo.values')).toEqual({
p50: {},
p90: {},
p99: {},
});
});
test('parsePercentileAggsByRuleType handles unrecognized percentiles', () => {
const aggsByType = {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [
{
key: '.index-threshold',
doc_count: 149,
percentileScheduledActions: {
values: {
'50.0': 4.0,
'75.0': 8.0,
'90.0': 26.0,
'99.0': 26.0,
},
},
},
{
key: 'logs.alert.document.count',
doc_count: 1,
percentileScheduledActions: {
values: {
'50.0': 10.0,
'75.0': 10.0,
'90.0': 10.0,
'99.0': 10.0,
},
},
},
],
};
expect(
parsePercentileAggsByRuleType(aggsByType.buckets, 'percentileScheduledActions.values')
).toEqual({
p50: {
'__index-threshold': 4,
logs__alert__document__count: 10,
},
p90: {
'__index-threshold': 26,
logs__alert__document__count: 10,
},
p99: {
'__index-threshold': 26,
logs__alert__document__count: 10,
},
});
});
});

View file

@ -1,962 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { ElasticsearchClient, Logger } from '@kbn/core/server';
import { get, merge } from 'lodash';
import { AlertingUsage } from './types';
import { NUM_ALERTING_RULE_TYPES } from './alerting_usage_collector';
const percentileFieldNameMapping: Record<string, string> = {
'50.0': 'p50',
'90.0': 'p90',
'99.0': 'p99',
};
const ruleTypeMetric = {
scripted_metric: {
init_script: 'state.ruleTypes = [:]; state.namespaces = [:]',
map_script: `
String ruleType = doc['alert.alertTypeId'].value;
String namespace = doc['namespaces'] !== null && doc['namespaces'].size() > 0 ? doc['namespaces'].value : 'default';
state.ruleTypes.put(ruleType, state.ruleTypes.containsKey(ruleType) ? state.ruleTypes.get(ruleType) + 1 : 1);
if (state.namespaces.containsKey(namespace) === false) {
state.namespaces.put(namespace, 1);
}
`,
// Combine script is executed per cluster, but we already have a key-value pair per cluster.
// Despite docs that say this is optional, this script can't be blank.
combine_script: 'return state',
// Reduce script is executed across all clusters, so we need to add up all the total from each cluster
// This also needs to account for having no data
reduce_script: `
HashMap result = new HashMap();
HashMap combinedRuleTypes = new HashMap();
HashMap combinedNamespaces = new HashMap();
for (state in states) {
for (String ruleType : state.ruleTypes.keySet()) {
int ruleTypeCount = combinedRuleTypes.containsKey(ruleType) ? combinedRuleTypes.get(ruleType) + state.ruleTypes.get(ruleType) : state.ruleTypes.get(ruleType);
combinedRuleTypes.put(ruleType, ruleTypeCount);
}
for (String namespace : state.namespaces.keySet()) {
combinedNamespaces.put(namespace, 1);
}
}
result.ruleTypes = combinedRuleTypes;
result.namespaces = combinedNamespaces;
return result;
`,
},
};
const generatedActionsPercentilesAgg = {
percentiles: {
field: 'kibana.alert.rule.execution.metrics.number_of_generated_actions',
percents: [50, 90, 99],
},
};
const alertsPercentilesAgg = {
percentiles: {
field: 'kibana.alert.rule.execution.metrics.alert_counts.active',
percents: [50, 90, 99],
},
};
const ruleTypeExecutionsWithDurationMetric = {
scripted_metric: {
init_script:
'state.ruleTypes = [:]; state.ruleTypesDuration = [:]; state.ruleTypesEsSearchDuration = [:]; state.ruleTypesTotalSearchDuration = [:];',
map_script: `
String ruleType = doc['rule.category'].value;
long duration = doc['event.duration'].value / (1000 * 1000);
long esSearchDuration = doc['kibana.alert.rule.execution.metrics.es_search_duration_ms'].empty ? 0 : doc['kibana.alert.rule.execution.metrics.es_search_duration_ms'].value;
long totalSearchDuration = doc['kibana.alert.rule.execution.metrics.total_search_duration_ms'].empty ? 0 : doc['kibana.alert.rule.execution.metrics.total_search_duration_ms'].value;
state.ruleTypes.put(ruleType, state.ruleTypes.containsKey(ruleType) ? state.ruleTypes.get(ruleType) + 1 : 1);
state.ruleTypesDuration.put(ruleType, state.ruleTypesDuration.containsKey(ruleType) ? state.ruleTypesDuration.get(ruleType) + duration : duration);
state.ruleTypesEsSearchDuration.put(ruleType, state.ruleTypesEsSearchDuration.containsKey(ruleType) ? state.ruleTypesEsSearchDuration.get(ruleType) + esSearchDuration : esSearchDuration);
state.ruleTypesTotalSearchDuration.put(ruleType, state.ruleTypesTotalSearchDuration.containsKey(ruleType) ? state.ruleTypesTotalSearchDuration.get(ruleType) + totalSearchDuration : totalSearchDuration);
`,
// Combine script is executed per cluster, but we already have a key-value pair per cluster.
// Despite docs that say this is optional, this script can't be blank.
combine_script: 'return state',
// Reduce script is executed across all clusters, so we need to add up all the total from each cluster
// This also needs to account for having no data
reduce_script: `
HashMap result = new HashMap();
HashMap combinedRuleTypes = new HashMap();
HashMap combinedRuleTypeDurations = new HashMap();
HashMap combinedRuleTypeEsSearchDurations = new HashMap();
HashMap combinedRuleTypeTotalSearchDurations = new HashMap();
for (state in states) {
for (String ruleType : state.ruleTypes.keySet()) {
int ruleTypeCount = combinedRuleTypes.containsKey(ruleType) ? combinedRuleTypes.get(ruleType) + state.ruleTypes.get(ruleType) : state.ruleTypes.get(ruleType);
combinedRuleTypes.put(ruleType, ruleTypeCount);
}
for (String ruleType : state.ruleTypesDuration.keySet()) {
long ruleTypeDurationTotal = combinedRuleTypeDurations.containsKey(ruleType) ? combinedRuleTypeDurations.get(ruleType) + state.ruleTypesDuration.get(ruleType) : state.ruleTypesDuration.get(ruleType);
combinedRuleTypeDurations.put(ruleType, ruleTypeDurationTotal);
}
for (String ruleType : state.ruleTypesEsSearchDuration.keySet()) {
long ruleTypeEsSearchDurationTotal = combinedRuleTypeEsSearchDurations.containsKey(ruleType) ? combinedRuleTypeEsSearchDurations.get(ruleType) + state.ruleTypesEsSearchDuration.get(ruleType) : state.ruleTypesEsSearchDuration.get(ruleType);
combinedRuleTypeEsSearchDurations.put(ruleType, ruleTypeEsSearchDurationTotal);
}
for (String ruleType : state.ruleTypesTotalSearchDuration.keySet()) {
long ruleTypeTotalSearchDurationTotal = combinedRuleTypeTotalSearchDurations.containsKey(ruleType) ? combinedRuleTypeTotalSearchDurations.get(ruleType) + state.ruleTypesTotalSearchDuration.get(ruleType) : state.ruleTypesTotalSearchDuration.get(ruleType);
combinedRuleTypeTotalSearchDurations.put(ruleType, ruleTypeTotalSearchDurationTotal);
}
}
result.ruleTypes = combinedRuleTypes;
result.ruleTypesDuration = combinedRuleTypeDurations;
result.ruleTypesEsSearchDuration = combinedRuleTypeEsSearchDurations;
result.ruleTypesTotalSearchDuration = combinedRuleTypeTotalSearchDurations;
return result;
`,
},
};
const ruleTypeExecutionsMetric = {
scripted_metric: {
init_script: 'state.ruleTypes = [:]',
map_script: `
String ruleType = doc['rule.category'].value;
state.ruleTypes.put(ruleType, state.ruleTypes.containsKey(ruleType) ? state.ruleTypes.get(ruleType) + 1 : 1);
`,
// Combine script is executed per cluster, but we already have a key-value pair per cluster.
// Despite docs that say this is optional, this script can't be blank.
combine_script: 'return state',
// Reduce script is executed across all clusters, so we need to add up all the total from each cluster
// This also needs to account for having no data
reduce_script: `
HashMap result = new HashMap();
HashMap combinedRuleTypes = new HashMap();
for (state in states) {
for (String ruleType : state.ruleTypes.keySet()) {
int ruleTypeCount = combinedRuleTypes.containsKey(ruleType) ? combinedRuleTypes.get(ruleType) + state.ruleTypes.get(ruleType) : state.ruleTypes.get(ruleType);
combinedRuleTypes.put(ruleType, ruleTypeCount);
}
}
result.ruleTypes = combinedRuleTypes;
return result;
`,
},
};
const taskTypeExecutionsMetric = {
scripted_metric: {
init_script: 'state.statuses = [:]',
map_script: `
String status = doc['task.status'].value;
String taskType = doc['task.taskType'].value.replace('alerting:', '');
Map taskTypes = state.statuses.containsKey(status) ? state.statuses.get(status) : [:];
taskTypes.put(taskType, taskTypes.containsKey(taskType) ? taskTypes.get(taskType) + 1 : 1);
state.statuses.put(status, taskTypes);
`,
// Combine script is executed per cluster, but we already have a key-value pair per cluster.
// Despite docs that say this is optional, this script can't be blank.
combine_script: 'return state',
// Reduce script is executed across all clusters, so we need to add up all the total from each cluster
// This also needs to account for having no data
reduce_script: `
HashMap result = new HashMap();
HashMap combinedStatuses = new HashMap();
for (state in states) {
for (String status : state.statuses.keySet()) {
HashMap combinedTaskTypes = new HashMap();
Map statusTaskTypes = state.statuses.get(status);
for (String taskType : statusTaskTypes.keySet()) {
int statusByTaskTypeCount = combinedTaskTypes.containsKey(taskType) ? combinedTaskTypes.get(taskType) + statusTaskTypes.get(taskType) : statusTaskTypes.get(taskType);
combinedTaskTypes.put(taskType, statusByTaskTypeCount);
}
combinedStatuses.put(status, combinedTaskTypes);
}
}
result.statuses = combinedStatuses;
return result;
`,
},
};
const ruleTypeFailureExecutionsMetric = {
scripted_metric: {
init_script: 'state.reasons = [:]',
map_script: `
if (doc['event.outcome'].value == 'failure') {
String reason = doc['event.reason'].value;
String ruleType = doc['rule.category'].value;
Map ruleTypes = state.reasons.containsKey(reason) ? state.reasons.get(reason) : [:];
ruleTypes.put(ruleType, ruleTypes.containsKey(ruleType) ? ruleTypes.get(ruleType) + 1 : 1);
state.reasons.put(reason, ruleTypes);
}
`,
// Combine script is executed per cluster, but we already have a key-value pair per cluster.
// Despite docs that say this is optional, this script can't be blank.
combine_script: 'return state',
// Reduce script is executed across all clusters, so we need to add up all the total from each cluster
// This also needs to account for having no data
reduce_script: `
HashMap result = new HashMap();
HashMap combinedReasons = new HashMap();
for (state in states) {
for (String reason : state.reasons.keySet()) {
HashMap combinedRuleTypes = new HashMap();
Map reasonRuleTypes = state.reasons.get(reason);
for (String ruleType : state.reasons.get(reason).keySet()) {
int reasonByRuleTypeCount = combinedRuleTypes.containsKey(ruleType) ? combinedRuleTypes.get(ruleType) + reasonRuleTypes.get(ruleType) : reasonRuleTypes.get(ruleType);
combinedRuleTypes.put(ruleType, reasonByRuleTypeCount);
}
combinedReasons.put(reason, combinedRuleTypes);
}
}
result.reasons = combinedReasons;
return result;
`,
},
};
export async function getTotalCountAggregations(
esClient: ElasticsearchClient,
kibanaIndex: string,
logger: Logger
): Promise<
Pick<
AlertingUsage,
| 'count_total'
| 'count_by_type'
| 'throttle_time'
| 'schedule_time'
| 'throttle_time_number_s'
| 'schedule_time_number_s'
| 'connectors_per_alert'
| 'count_rules_namespaces'
>
> {
try {
const results = await esClient.search({
index: kibanaIndex,
body: {
size: 0,
query: {
bool: {
filter: [{ term: { type: 'alert' } }],
},
},
runtime_mappings: {
alert_action_count: {
type: 'long',
script: {
source: `
def alert = params._source['alert'];
if (alert != null) {
def actions = alert.actions;
if (actions != null) {
emit(actions.length);
} else {
emit(0);
}
}`,
},
},
alert_interval: {
type: 'long',
script: {
source: `
int parsed = 0;
if (doc['alert.schedule.interval'].size() > 0) {
def interval = doc['alert.schedule.interval'].value;
if (interval.length() > 1) {
// get last char
String timeChar = interval.substring(interval.length() - 1);
// remove last char
interval = interval.substring(0, interval.length() - 1);
if (interval.chars().allMatch(Character::isDigit)) {
// using of regex is not allowed in painless language
parsed = Integer.parseInt(interval);
if (timeChar.equals("s")) {
parsed = parsed;
} else if (timeChar.equals("m")) {
parsed = parsed * 60;
} else if (timeChar.equals("h")) {
parsed = parsed * 60 * 60;
} else if (timeChar.equals("d")) {
parsed = parsed * 24 * 60 * 60;
}
emit(parsed);
}
}
}
emit(parsed);
`,
},
},
alert_throttle: {
type: 'long',
script: {
source: `
int parsed = 0;
if (doc['alert.throttle'].size() > 0) {
def throttle = doc['alert.throttle'].value;
if (throttle.length() > 1) {
// get last char
String timeChar = throttle.substring(throttle.length() - 1);
// remove last char
throttle = throttle.substring(0, throttle.length() - 1);
if (throttle.chars().allMatch(Character::isDigit)) {
// using of regex is not allowed in painless language
parsed = Integer.parseInt(throttle);
if (timeChar.equals("s")) {
parsed = parsed;
} else if (timeChar.equals("m")) {
parsed = parsed * 60;
} else if (timeChar.equals("h")) {
parsed = parsed * 60 * 60;
} else if (timeChar.equals("d")) {
parsed = parsed * 24 * 60 * 60;
}
emit(parsed);
}
}
}
emit(parsed);
`,
},
},
},
aggs: {
byRuleTypeId: ruleTypeMetric,
max_throttle_time: { max: { field: 'alert_throttle' } },
min_throttle_time: { min: { field: 'alert_throttle' } },
avg_throttle_time: { avg: { field: 'alert_throttle' } },
max_interval_time: { max: { field: 'alert_interval' } },
min_interval_time: { min: { field: 'alert_interval' } },
avg_interval_time: { avg: { field: 'alert_interval' } },
max_actions_count: { max: { field: 'alert_action_count' } },
min_actions_count: { min: { field: 'alert_action_count' } },
avg_actions_count: { avg: { field: 'alert_action_count' } },
},
},
});
const aggregations = results.aggregations as {
byRuleTypeId: { value: { ruleTypes: Record<string, string> } };
max_throttle_time: { value: number };
min_throttle_time: { value: number };
avg_throttle_time: { value: number };
max_interval_time: { value: number };
min_interval_time: { value: number };
avg_interval_time: { value: number };
max_actions_count: { value: number };
min_actions_count: { value: number };
avg_actions_count: { value: number };
};
const totalRulesCount = Object.keys(aggregations.byRuleTypeId.value.ruleTypes).reduce(
(total: number, key: string) =>
parseInt(aggregations.byRuleTypeId.value.ruleTypes[key], 10) + total,
0
);
return {
count_total: totalRulesCount,
count_by_type: replaceDotSymbolsInRuleTypeIds(aggregations.byRuleTypeId.value.ruleTypes),
throttle_time: {
min: `${aggregations.min_throttle_time.value}s`,
avg: `${aggregations.avg_throttle_time.value}s`,
max: `${aggregations.max_throttle_time.value}s`,
},
schedule_time: {
min: `${aggregations.min_interval_time.value}s`,
avg: `${aggregations.avg_interval_time.value}s`,
max: `${aggregations.max_interval_time.value}s`,
},
throttle_time_number_s: {
min: aggregations.min_throttle_time.value,
avg: aggregations.avg_throttle_time.value,
max: aggregations.max_throttle_time.value,
},
schedule_time_number_s: {
min: aggregations.min_interval_time.value,
avg: aggregations.avg_interval_time.value,
max: aggregations.max_interval_time.value,
},
connectors_per_alert: {
min: aggregations.min_actions_count.value,
avg: aggregations.avg_actions_count.value,
max: aggregations.max_actions_count.value,
},
count_rules_namespaces: 0,
};
} catch (err) {
logger.warn(
`Error executing alerting telemetry task: getTotalCountAggregations - ${JSON.stringify(err)}`
);
return {
count_total: 0,
count_by_type: {},
throttle_time: {
min: '0s',
avg: '0s',
max: '0s',
},
schedule_time: {
min: '0s',
avg: '0s',
max: '0s',
},
throttle_time_number_s: {
min: 0,
avg: 0,
max: 0,
},
schedule_time_number_s: {
min: 0,
avg: 0,
max: 0,
},
connectors_per_alert: {
min: 0,
avg: 0,
max: 0,
},
count_rules_namespaces: 0,
};
}
}
export async function getTotalCountInUse(
esClient: ElasticsearchClient,
kibanaIndex: string,
logger: Logger
) {
try {
const searchResult = await esClient.search({
index: kibanaIndex,
size: 0,
body: {
query: {
bool: {
filter: [{ term: { type: 'alert' } }, { term: { 'alert.enabled': true } }],
},
},
aggs: {
byRuleTypeId: ruleTypeMetric,
},
},
});
const aggregations = searchResult.aggregations as {
byRuleTypeId: {
value: { ruleTypes: Record<string, string>; namespaces: Record<string, string> };
};
};
return {
countTotal: Object.keys(aggregations.byRuleTypeId.value.ruleTypes).reduce(
(total: number, key: string) =>
parseInt(aggregations.byRuleTypeId.value.ruleTypes[key], 10) + total,
0
),
countByType: replaceDotSymbolsInRuleTypeIds(aggregations.byRuleTypeId.value.ruleTypes),
countNamespaces: Object.keys(aggregations.byRuleTypeId.value.namespaces).length,
};
} catch (err) {
logger.warn(
`Error executing alerting telemetry task: getTotalCountInUse - ${JSON.stringify(err)}`
);
return {
countTotal: 0,
countByType: {},
countNamespaces: 0,
};
}
}
export async function getExecutionsPerDayCount(
esClient: ElasticsearchClient,
eventLogIndex: string,
logger: Logger
) {
try {
const searchResult = await esClient.search({
index: eventLogIndex,
size: 0,
body: {
query: {
bool: {
filter: {
bool: {
must: [
{
term: { 'event.action': 'execute' },
},
{
term: { 'event.provider': 'alerting' },
},
{
range: {
'@timestamp': {
gte: 'now-1d',
},
},
},
],
},
},
},
},
aggs: {
byRuleTypeId: ruleTypeExecutionsWithDurationMetric,
failuresByReason: ruleTypeFailureExecutionsMetric,
avgDuration: { avg: { field: 'event.duration' } },
avgEsSearchDuration: {
avg: { field: 'kibana.alert.rule.execution.metrics.es_search_duration_ms' },
},
avgTotalSearchDuration: {
avg: { field: 'kibana.alert.rule.execution.metrics.total_search_duration_ms' },
},
percentileScheduledActions: generatedActionsPercentilesAgg,
percentileAlerts: alertsPercentilesAgg,
aggsByType: {
terms: {
field: 'rule.category',
size: NUM_ALERTING_RULE_TYPES,
},
aggs: {
percentileScheduledActions: generatedActionsPercentilesAgg,
percentileAlerts: alertsPercentilesAgg,
},
},
},
},
});
const executionsAggregations = searchResult.aggregations as {
byRuleTypeId: {
value: {
ruleTypes: Record<string, string>;
ruleTypesDuration: Record<string, number>;
ruleTypesEsSearchDuration: Record<string, number>;
ruleTypesTotalSearchDuration: Record<string, number>;
};
};
};
const aggsAvgExecutionTime = Math.round(
// @ts-expect-error aggegation type is not specified
// convert nanoseconds to milliseconds
searchResult.aggregations.avgDuration.value / (1000 * 1000)
);
const aggsAvgEsSearchDuration = Math.round(
// @ts-expect-error aggegation type is not specified
searchResult.aggregations.avgEsSearchDuration.value
);
const aggsAvgTotalSearchDuration = Math.round(
// @ts-expect-error aggegation type is not specified
searchResult.aggregations.avgTotalSearchDuration.value
);
const aggsGeneratedActionsPercentiles =
// @ts-expect-error aggegation type is not specified
searchResult.aggregations.percentileScheduledActions.values;
const aggsAlertsPercentiles =
// @ts-expect-error aggegation type is not specified
searchResult.aggregations.percentileAlerts.values;
const aggsByTypeBuckets =
// @ts-expect-error aggegation type is not specified
searchResult.aggregations.aggsByType.buckets;
const executionFailuresAggregations = searchResult.aggregations as {
failuresByReason: { value: { reasons: Record<string, Record<string, string>> } };
};
return {
countTotal: Object.keys(executionsAggregations.byRuleTypeId.value.ruleTypes).reduce(
(total: number, key: string) =>
parseInt(executionsAggregations.byRuleTypeId.value.ruleTypes[key], 10) + total,
0
),
countByType: replaceDotSymbolsInRuleTypeIds(
executionsAggregations.byRuleTypeId.value.ruleTypes
),
countTotalFailures: Object.keys(
executionFailuresAggregations.failuresByReason.value.reasons
).reduce((total: number, reason: string) => {
const byRuleTypesRefs =
executionFailuresAggregations.failuresByReason.value.reasons[reason];
const countByRuleTypes = Object.keys(byRuleTypesRefs).reduce(
(totalByType, ruleType) => parseInt(byRuleTypesRefs[ruleType] + totalByType, 10),
0
);
return countByRuleTypes + total;
}, 0),
countFailuresByReason: Object.keys(
executionFailuresAggregations.failuresByReason.value.reasons
).reduce(
// ES DSL aggregations are returned as `any` by esClient.search
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(obj: any, reason: string) => {
const byRuleTypesRefs =
executionFailuresAggregations.failuresByReason.value.reasons[reason];
const countByRuleTypes = Object.keys(byRuleTypesRefs).reduce(
(totalByType, ruleType) => parseInt(byRuleTypesRefs[ruleType] + totalByType, 10),
0
);
return {
...obj,
[replaceDotSymbols(reason)]: countByRuleTypes,
};
},
{}
),
countFailuresByReasonByType: Object.keys(
executionFailuresAggregations.failuresByReason.value.reasons
).reduce(
// ES DSL aggregations are returned as `any` by esClient.search
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(obj: any, key: string) => ({
...obj,
[key]: replaceDotSymbolsInRuleTypeIds(
executionFailuresAggregations.failuresByReason.value.reasons[key]
),
}),
{}
),
avgExecutionTime: aggsAvgExecutionTime,
avgExecutionTimeByType: Object.keys(
executionsAggregations.byRuleTypeId.value.ruleTypes
).reduce(
// ES DSL aggregations are returned as `any` by esClient.search
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(obj: any, key: string) => ({
...obj,
[replaceDotSymbols(key)]: Math.round(
executionsAggregations.byRuleTypeId.value.ruleTypesDuration[key] /
parseInt(executionsAggregations.byRuleTypeId.value.ruleTypes[key], 10)
),
}),
{}
),
avgEsSearchDuration: aggsAvgEsSearchDuration,
avgEsSearchDurationByType: Object.keys(
executionsAggregations.byRuleTypeId.value.ruleTypes
).reduce(
// ES DSL aggregations are returned as `any` by esClient.search
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(obj: any, key: string) => ({
...obj,
[replaceDotSymbols(key)]: Math.round(
executionsAggregations.byRuleTypeId.value.ruleTypesEsSearchDuration[key] /
parseInt(executionsAggregations.byRuleTypeId.value.ruleTypes[key], 10)
),
}),
{}
),
avgTotalSearchDuration: aggsAvgTotalSearchDuration,
avgTotalSearchDurationByType: Object.keys(
executionsAggregations.byRuleTypeId.value.ruleTypes
).reduce(
// ES DSL aggregations are returned as `any` by esClient.search
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(obj: any, key: string) => ({
...obj,
[replaceDotSymbols(key)]: Math.round(
executionsAggregations.byRuleTypeId.value.ruleTypesTotalSearchDuration[key] /
parseInt(executionsAggregations.byRuleTypeId.value.ruleTypes[key], 10)
),
}),
{}
),
generatedActionsPercentiles: Object.keys(aggsGeneratedActionsPercentiles).reduce(
// ES DSL aggregations are returned as `any` by esClient.search
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(acc: any, curr: string) => ({
...acc,
...(percentileFieldNameMapping[curr]
? { [percentileFieldNameMapping[curr]]: aggsGeneratedActionsPercentiles[curr] }
: {}),
}),
{}
),
generatedActionsPercentilesByType: parsePercentileAggsByRuleType(
aggsByTypeBuckets,
'percentileScheduledActions.values'
),
alertsPercentiles: Object.keys(aggsAlertsPercentiles).reduce(
// ES DSL aggregations are returned as `any` by esClient.search
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(acc: any, curr: string) => ({
...acc,
...(percentileFieldNameMapping[curr]
? { [percentileFieldNameMapping[curr]]: aggsAlertsPercentiles[curr] }
: {}),
}),
{}
),
alertsPercentilesByType: parsePercentileAggsByRuleType(
aggsByTypeBuckets,
'percentileAlerts.values'
),
};
} catch (err) {
logger.warn(
`Error executing alerting telemetry task: getExecutionsPerDayCount - ${JSON.stringify(err)}`
);
return {
countTotal: 0,
countByType: {},
countTotalFailures: 0,
countFailuresByReason: {},
countFailuresByReasonByType: {},
avgExecutionTime: 0,
avgExecutionTimeByType: {},
avgEsSearchDuration: 0,
avgEsSearchDurationByType: {},
avgTotalSearchDuration: 0,
avgTotalSearchDurationByType: {},
generatedActionsPercentiles: {},
generatedActionsPercentilesByType: {},
alertsPercentiles: {},
alertsPercentilesByType: {},
};
}
}
export async function getExecutionTimeoutsPerDayCount(
esClient: ElasticsearchClient,
eventLogIndex: string,
logger: Logger
) {
try {
const searchResult = await esClient.search({
index: eventLogIndex,
size: 0,
body: {
query: {
bool: {
filter: {
bool: {
must: [
{
term: { 'event.action': 'execute-timeout' },
},
{
term: { 'event.provider': 'alerting' },
},
{
range: {
'@timestamp': {
gte: 'now-1d',
},
},
},
],
},
},
},
},
aggs: {
byRuleTypeId: ruleTypeExecutionsMetric,
},
},
});
const executionsAggregations = searchResult.aggregations as {
byRuleTypeId: {
value: { ruleTypes: Record<string, string>; ruleTypesDuration: Record<string, number> };
};
};
return {
countTotal: Object.keys(executionsAggregations.byRuleTypeId.value.ruleTypes).reduce(
(total: number, key: string) =>
parseInt(executionsAggregations.byRuleTypeId.value.ruleTypes[key], 10) + total,
0
),
countByType: replaceDotSymbolsInRuleTypeIds(
executionsAggregations.byRuleTypeId.value.ruleTypes
),
};
} catch (err) {
logger.warn(
`Error executing alerting telemetry task: getExecutionsTimeoutsPerDayCount - ${JSON.stringify(
err
)}`
);
return {
countTotal: 0,
countByType: {},
};
}
}
export async function getFailedAndUnrecognizedTasksPerDay(
esClient: ElasticsearchClient,
taskManagerIndex: string,
logger: Logger
) {
try {
const searchResult = await esClient.search({
index: taskManagerIndex,
size: 0,
body: {
query: {
bool: {
must: [
{
bool: {
should: [
{
term: {
'task.status': 'unrecognized',
},
},
{
term: {
'task.status': 'failed',
},
},
],
},
},
{
wildcard: {
'task.taskType': {
value: 'alerting:*',
},
},
},
{
range: {
'task.runAt': {
gte: 'now-1d',
},
},
},
],
},
},
aggs: {
byTaskTypeId: taskTypeExecutionsMetric,
},
},
});
const executionsAggregations = searchResult.aggregations as {
byTaskTypeId: { value: { statuses: Record<string, Record<string, string>> } };
};
return {
countTotal: Object.keys(executionsAggregations.byTaskTypeId.value.statuses).reduce(
(total: number, status: string) => {
const byRuleTypesRefs = executionsAggregations.byTaskTypeId.value.statuses[status];
const countByRuleTypes = Object.keys(byRuleTypesRefs).reduce(
(totalByType, ruleType) => parseInt(byRuleTypesRefs[ruleType] + totalByType, 10),
0
);
return countByRuleTypes + total;
},
0
),
countByStatus: Object.keys(executionsAggregations.byTaskTypeId.value.statuses).reduce(
// ES DSL aggregations are returned as `any` by esClient.search
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(obj: any, status: string) => {
const byRuleTypesRefs = executionsAggregations.byTaskTypeId.value.statuses[status];
const countByRuleTypes = Object.keys(byRuleTypesRefs).reduce(
(totalByType, ruleType) => parseInt(byRuleTypesRefs[ruleType] + totalByType, 10),
0
);
return {
...obj,
[status]: countByRuleTypes,
};
},
{}
),
countByStatusByRuleType: Object.keys(
executionsAggregations.byTaskTypeId.value.statuses
).reduce(
// ES DSL aggregations are returned as `any` by esClient.search
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(obj: any, key: string) => ({
...obj,
[key]: replaceDotSymbolsInRuleTypeIds(
executionsAggregations.byTaskTypeId.value.statuses[key]
),
}),
{}
),
};
} catch (err) {
logger.warn(
`Error executing alerting telemetry task: getFailedAndUnrecognizedTasksPerDay - ${JSON.stringify(
err
)}`
);
return {
countTotal: 0,
countByStatus: {},
countByStatusByRuleType: {},
};
}
}
function replaceDotSymbols(strToReplace: string) {
return strToReplace.replaceAll('.', '__');
}
function replaceDotSymbolsInRuleTypeIds(ruleTypeIdObj: Record<string, string>) {
return Object.keys(ruleTypeIdObj).reduce(
(obj, key) => ({ ...obj, [replaceDotSymbols(key)]: ruleTypeIdObj[key] }),
{}
);
}
export function parsePercentileAggsByRuleType(
aggsByType: estypes.AggregationsStringTermsBucketKeys[],
path: string
) {
return (aggsByType ?? []).reduce(
(acc, curr) => {
const percentiles = get(curr, path, {});
return merge(
acc,
Object.keys(percentiles).reduce((pacc, pcurr) => {
return {
...pacc,
...(percentileFieldNameMapping[pcurr]
? {
[percentileFieldNameMapping[pcurr]]: {
[replaceDotSymbols(curr.key)]: percentiles[pcurr] ?? 0,
},
}
: {}),
};
}, {})
);
},
{ p50: {}, p90: {}, p99: {} }
);
}

View file

@ -68,6 +68,8 @@ const byReasonSchema: MakeSchemaFrom<AlertingUsage>['count_rules_executions_fail
unknown: { type: 'long' },
};
export const NUM_ALERTING_EXECUTION_FAILURE_REASON_TYPES = Object.keys(byReasonSchema).length;
const byPercentileSchema: MakeSchemaFrom<AlertingUsage>['percentile_num_generated_actions_per_day'] =
{
p50: { type: 'long' },

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,583 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { flatMap, merge } from 'lodash';
import type {
AggregationsKeyedPercentiles,
AggregationsSingleBucketAggregateBase,
AggregationsPercentilesAggregateBase,
AggregationsSingleMetricAggregateBase,
AggregationsTermsAggregateBase,
AggregationsStringTermsBucketKeys,
AggregationsBuckets,
} from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { ElasticsearchClient, Logger } from '@kbn/core/server';
import {
NUM_ALERTING_RULE_TYPES,
NUM_ALERTING_EXECUTION_FAILURE_REASON_TYPES,
} from '../alerting_usage_collector';
import { replaceDotSymbols } from './replace_dots_with_underscores';
import { parseSimpleRuleTypeBucket } from './parse_simple_rule_type_bucket';
const Millis2Nanos = 1000 * 1000;
const percentileFieldNameMapping: Record<string, string> = {
'50.0': 'p50',
'90.0': 'p90',
'99.0': 'p99',
};
interface Opts {
esClient: ElasticsearchClient;
eventLogIndex: string;
logger: Logger;
}
interface GetExecutionsPerDayCountResults {
countTotalRuleExecutions: number;
countRuleExecutionsByType: Record<string, number>;
countTotalFailedExecutions: number;
countFailedExecutionsByReason: Record<string, number>;
countFailedExecutionsByReasonByType: Record<string, Record<string, number>>;
avgExecutionTime: number;
avgExecutionTimeByType: Record<string, number>;
avgEsSearchDuration: number;
avgEsSearchDurationByType: Record<string, number>;
avgTotalSearchDuration: number;
avgTotalSearchDurationByType: Record<string, number>;
generatedActionsPercentiles: Record<string, number>;
generatedActionsPercentilesByType: Record<string, Record<string, number>>;
alertsPercentiles: Record<string, number>;
alertsPercentilesByType: Record<string, Record<string, number>>;
}
interface GetExecutionTimeoutsPerDayCountResults {
countExecutionTimeouts: number;
countExecutionTimeoutsByType: Record<string, number>;
}
interface GetExecutionCountsExecutionFailures extends AggregationsSingleBucketAggregateBase {
by_reason: AggregationsTermsAggregateBase<AggregationsStringTermsBucketKeys>;
}
interface GetExecutionCountsAggregationBucket extends AggregationsStringTermsBucketKeys {
avg_execution_time: AggregationsSingleMetricAggregateBase;
avg_es_search_duration: AggregationsSingleMetricAggregateBase;
avg_total_search_duration: AggregationsSingleMetricAggregateBase;
execution_failures: GetExecutionCountsExecutionFailures;
percentile_scheduled_actions: AggregationsPercentilesAggregateBase;
percentile_alerts: AggregationsPercentilesAggregateBase;
}
interface IGetExecutionFailures extends AggregationsSingleBucketAggregateBase {
by_reason: AggregationsTermsAggregateBase<AggregationsStringTermsBucketKeys>;
}
export async function getExecutionsPerDayCount({
esClient,
eventLogIndex,
logger,
}: Opts): Promise<GetExecutionsPerDayCountResults> {
try {
const eventLogAggs = {
avg_execution_time: {
avg: {
field: 'event.duration',
},
},
avg_es_search_duration: {
avg: {
field: 'kibana.alert.rule.execution.metrics.es_search_duration_ms',
},
},
avg_total_search_duration: {
avg: {
field: 'kibana.alert.rule.execution.metrics.total_search_duration_ms',
},
},
percentile_scheduled_actions: {
percentiles: {
field: 'kibana.alert.rule.execution.metrics.number_of_generated_actions',
percents: [50, 90, 99],
},
},
percentile_alerts: {
percentiles: {
field: 'kibana.alert.rule.execution.metrics.alert_counts.active',
percents: [50, 90, 99],
},
},
execution_failures: {
filter: {
term: {
'event.outcome': 'failure',
},
},
aggs: {
by_reason: {
terms: {
field: 'event.reason',
size: NUM_ALERTING_EXECUTION_FAILURE_REASON_TYPES,
},
},
},
},
};
const query = {
index: eventLogIndex,
size: 0,
body: {
query: getProviderAndActionFilterForTimeRange('execute'),
aggs: {
...eventLogAggs,
by_rule_type_id: {
terms: {
field: 'rule.category',
size: NUM_ALERTING_RULE_TYPES,
},
aggs: eventLogAggs,
},
},
},
};
logger.debug(`query for getExecutionsPerDayCount - ${JSON.stringify(query)}`);
const results = await esClient.search(query);
logger.debug(`results for getExecutionsPerDayCount query - ${JSON.stringify(results)}`);
const totalRuleExecutions =
typeof results.hits.total === 'number' ? results.hits.total : results.hits.total?.value;
const aggregations = results.aggregations as {
by_rule_type_id: AggregationsTermsAggregateBase<GetExecutionCountsAggregationBucket>;
execution_failures: IGetExecutionFailures;
percentile_scheduled_actions: AggregationsPercentilesAggregateBase;
percentile_alerts: AggregationsPercentilesAggregateBase;
avg_execution_time: AggregationsSingleMetricAggregateBase;
avg_es_search_duration: AggregationsSingleMetricAggregateBase;
avg_total_search_duration: AggregationsSingleMetricAggregateBase;
};
const aggregationsByRuleTypeId: AggregationsBuckets<GetExecutionCountsAggregationBucket> =
aggregations.by_rule_type_id.buckets as GetExecutionCountsAggregationBucket[];
return {
...parseRuleTypeBucket(aggregationsByRuleTypeId),
...parseExecutionFailureByRuleType(aggregationsByRuleTypeId),
...parseExecutionCountAggregationResults(aggregations),
countTotalRuleExecutions: totalRuleExecutions ?? 0,
};
} catch (err) {
logger.warn(
`Error executing alerting telemetry task: getExecutionsPerDayCount - ${JSON.stringify(err)}`,
{
tags: ['alerting', 'telemetry-failed'],
error: { stack_trace: err.stack },
}
);
return {
countTotalRuleExecutions: 0,
countRuleExecutionsByType: {},
countTotalFailedExecutions: 0,
countFailedExecutionsByReason: {},
countFailedExecutionsByReasonByType: {},
avgExecutionTime: 0,
avgExecutionTimeByType: {},
avgEsSearchDuration: 0,
avgEsSearchDurationByType: {},
avgTotalSearchDuration: 0,
avgTotalSearchDurationByType: {},
generatedActionsPercentiles: {},
generatedActionsPercentilesByType: {},
alertsPercentiles: {},
alertsPercentilesByType: {},
};
}
}
export async function getExecutionTimeoutsPerDayCount({
esClient,
eventLogIndex,
logger,
}: Opts): Promise<GetExecutionTimeoutsPerDayCountResults> {
try {
const query = {
index: eventLogIndex,
size: 0,
body: {
query: getProviderAndActionFilterForTimeRange('execute-timeout'),
aggs: {
by_rule_type_id: {
terms: {
field: 'rule.category',
size: NUM_ALERTING_RULE_TYPES,
},
},
},
},
};
logger.debug(`query for getExecutionTimeoutsPerDayCount - ${JSON.stringify(query)}`);
const results = await esClient.search(query);
logger.debug(`results for getExecutionTimeoutsPerDayCount query - ${JSON.stringify(results)}`);
const aggregations = results.aggregations as {
by_rule_type_id: AggregationsTermsAggregateBase<AggregationsStringTermsBucketKeys>;
};
const totalTimedoutExecutionsCount =
typeof results.hits.total === 'number' ? results.hits.total : results.hits.total?.value;
return {
countExecutionTimeouts: totalTimedoutExecutionsCount ?? 0,
countExecutionTimeoutsByType: parseSimpleRuleTypeBucket(aggregations.by_rule_type_id.buckets),
};
} catch (err) {
logger.warn(
`Error executing alerting telemetry task: getExecutionsTimeoutsPerDayCount - ${JSON.stringify(
err
)}`,
{
tags: ['alerting', 'telemetry-failed'],
error: { stack_trace: err.stack },
}
);
return {
countExecutionTimeouts: 0,
countExecutionTimeoutsByType: {},
};
}
}
/**
* Bucket format:
* {
* key: '.index-threshold', // rule type id
* doc_count: 78, // count of number of executions
* avg_es_search_duration: { // average es search duration across executions
* value: 40.76056338028169,
* },
* percentile_alerts: { // stats for number of alerts created across executions
* values: {
* '50.0': 1,
* '95.0': 1,
* '99.0': 1,
* },
* },
* execution_failures: {
* doc_count: 7, // count of number of failed executions
* by_reason: {
* doc_count_error_upper_bound: 0,
* sum_other_doc_count: 0,
* buckets: [
* {
* key: 'execute', // breakdown of reason for execution failures
* doc_count: 4,
* },
* {
* key: 'decrypt',
* doc_count: 3,
* },
* ],
* },
* },
* percentile_scheduled_actions: { // stats for number of actions generated across executions
* values: {
* '50.0': 0,
* '95.0': 0,
* '99.0': 0,
* },
* },
* avg_execution_time: { // average execution time in nanoseconds across executions
* value: 100576923.07692307,
* },
* avg_total_search_duration: { // average total search duration across executions
* value: 43.74647887323944,
* },
* }
*/
export function parseRuleTypeBucket(
buckets: GetExecutionCountsAggregationBucket[]
): Pick<
GetExecutionsPerDayCountResults,
| 'countRuleExecutionsByType'
| 'avgExecutionTimeByType'
| 'avgEsSearchDurationByType'
| 'avgTotalSearchDurationByType'
| 'generatedActionsPercentilesByType'
| 'alertsPercentilesByType'
> {
let summary = {
countRuleExecutionsByType: {},
avgExecutionTimeByType: {},
avgEsSearchDurationByType: {},
avgTotalSearchDurationByType: {},
generatedActionsPercentilesByType: { p50: {}, p90: {}, p99: {} },
alertsPercentilesByType: { p50: {}, p90: {}, p99: {} },
};
for (const bucket of buckets ?? []) {
const ruleType: string = replaceDotSymbols(bucket?.key) ?? '';
const numExecutions: number = bucket?.doc_count ?? 0;
const avgExecutionTimeNanos = bucket?.avg_execution_time?.value ?? 0;
const avgEsSearchTimeMillis = bucket?.avg_es_search_duration?.value ?? 0;
const avgTotalSearchTimeMillis = bucket?.avg_total_search_duration?.value ?? 0;
const actionPercentiles = bucket?.percentile_scheduled_actions?.values ?? {};
const alertPercentiles = bucket?.percentile_alerts?.values ?? {};
summary = {
countRuleExecutionsByType: {
...summary.countRuleExecutionsByType,
[ruleType]: numExecutions,
},
avgExecutionTimeByType: {
...summary.avgExecutionTimeByType,
[ruleType]: Math.round(avgExecutionTimeNanos / Millis2Nanos),
},
avgEsSearchDurationByType: {
...summary.avgEsSearchDurationByType,
[ruleType]: Math.round(avgEsSearchTimeMillis),
},
avgTotalSearchDurationByType: {
...summary.avgTotalSearchDurationByType,
[ruleType]: Math.round(avgTotalSearchTimeMillis),
},
generatedActionsPercentilesByType: merge(
summary.generatedActionsPercentilesByType,
parsePercentileAggs(actionPercentiles as AggregationsKeyedPercentiles, ruleType)
),
alertsPercentilesByType: merge(
summary.alertsPercentilesByType,
parsePercentileAggs(alertPercentiles as AggregationsKeyedPercentiles, ruleType)
),
};
}
return summary;
}
interface FlattenedExecutionFailureBucket {
ruleType: string;
key: string;
doc_count: number;
}
export function parseExecutionFailureByRuleType(
buckets: GetExecutionCountsAggregationBucket[]
): Pick<GetExecutionsPerDayCountResults, 'countFailedExecutionsByReasonByType'> {
const executionFailuresWithRuleTypeBuckets: FlattenedExecutionFailureBucket[] = flatMap(
buckets ?? [],
(bucket) => {
const ruleType: string = replaceDotSymbols(bucket.key);
/**
* Execution failure bucket format
* [
* {
* key: 'execute',
* doc_count: 4,
* },
* {
* key: 'decrypt',
* doc_count: 3,
* },
* ]
*/
const executionFailuresBuckets = bucket?.execution_failures?.by_reason
?.buckets as AggregationsStringTermsBucketKeys[];
return (executionFailuresBuckets ?? []).map((b) => ({ ...b, ruleType }));
}
);
const parsedFailures = (executionFailuresWithRuleTypeBuckets ?? []).reduce(
(acc: Record<string, Record<string, number>>, bucket: FlattenedExecutionFailureBucket) => {
const ruleType: string = bucket.ruleType;
const reason: string = bucket.key;
if (acc[reason]) {
if (acc[reason][ruleType]) {
return {
...acc,
[reason]: {
...acc[reason],
[ruleType]: acc[reason][ruleType] + bucket.doc_count,
},
};
}
return {
...acc,
[reason]: {
...acc[reason],
[ruleType]: bucket.doc_count,
},
};
}
return {
...acc,
[reason]: {
[ruleType]: bucket.doc_count,
},
};
},
{}
);
return {
countFailedExecutionsByReasonByType: parsedFailures,
};
}
export function parsePercentileAggs(
percentiles: AggregationsKeyedPercentiles,
ruleTypeId?: string
) {
return Object.keys(percentiles ?? {}).reduce((acc, percentileKey: string) => {
let result = {};
const percentileKeyMapped = percentileFieldNameMapping[percentileKey];
if (percentileKeyMapped) {
if (ruleTypeId) {
result = {
[percentileKeyMapped]: {
[ruleTypeId]: percentiles[percentileKey] ?? 0,
},
};
} else {
result = {
[percentileKeyMapped]: percentiles[percentileKey] ?? 0,
};
}
}
return {
...acc,
...result,
};
}, {});
}
/**
* Aggregation Result Format (minus rule type id agg buckets)
* {
* avg_es_search_duration: {
* value: 26.246376811594203,
* },
* percentile_alerts: {
* values: {
* '50.0': 1,
* '90.0': 5,
* '99.0': 5,
* },
* },
* execution_failures: {
* doc_count: 10,
* by_reason: {
* doc_count_error_upper_bound: 0,
* sum_other_doc_count: 0,
* buckets: [
* {
* key: 'decrypt',
* doc_count: 6,
* },
* {
* key: 'execute',
* doc_count: 4,
* },
* ],
* },
* },
* percentile_scheduled_actions: {
* values: {
* '50.0': 0,
* '95.0': 5,
* '99.0': 5,
* },
* },
* avg_execution_time: {
* value: 288250000,
* },
* avg_total_search_duration: {
* value: 28.630434782608695,
* },
*/
export function parseExecutionCountAggregationResults(results: {
execution_failures: IGetExecutionFailures;
percentile_scheduled_actions: AggregationsPercentilesAggregateBase;
percentile_alerts: AggregationsPercentilesAggregateBase;
avg_execution_time: AggregationsSingleMetricAggregateBase;
avg_es_search_duration: AggregationsSingleMetricAggregateBase;
avg_total_search_duration: AggregationsSingleMetricAggregateBase;
}): Pick<
GetExecutionsPerDayCountResults,
| 'countTotalFailedExecutions'
| 'countFailedExecutionsByReason'
| 'avgExecutionTime'
| 'avgEsSearchDuration'
| 'avgTotalSearchDuration'
| 'generatedActionsPercentiles'
| 'alertsPercentiles'
> {
const avgExecutionTimeNanos = results?.avg_execution_time?.value ?? 0;
const avgEsSearchDurationMillis = results?.avg_es_search_duration?.value ?? 0;
const avgTotalSearchDurationMillis = results?.avg_total_search_duration?.value ?? 0;
const executionFailuresByReasonBuckets =
(results?.execution_failures?.by_reason?.buckets as AggregationsStringTermsBucketKeys[]) ?? [];
const actionPercentiles = results?.percentile_scheduled_actions?.values ?? {};
const alertPercentiles = results?.percentile_alerts?.values ?? {};
return {
countTotalFailedExecutions: results?.execution_failures?.doc_count ?? 0,
countFailedExecutionsByReason: executionFailuresByReasonBuckets.reduce(
(acc: Record<string, number>, bucket: AggregationsStringTermsBucketKeys) => {
const reason: string = bucket.key;
return {
...acc,
[reason]: bucket.doc_count ?? 0,
};
},
{}
),
avgExecutionTime: Math.round(avgExecutionTimeNanos / Millis2Nanos),
avgEsSearchDuration: Math.round(avgEsSearchDurationMillis),
avgTotalSearchDuration: Math.round(avgTotalSearchDurationMillis),
generatedActionsPercentiles: parsePercentileAggs(
actionPercentiles as AggregationsKeyedPercentiles
),
alertsPercentiles: parsePercentileAggs(alertPercentiles as AggregationsKeyedPercentiles),
};
}
function getProviderAndActionFilterForTimeRange(
action: string,
provider: string = 'alerting',
range: string = '1d'
) {
return {
bool: {
filter: {
bool: {
must: [
{
term: { 'event.action': action },
},
{
term: { 'event.provider': provider },
},
{
range: {
'@timestamp': {
gte: `now-${range}`,
},
},
},
],
},
},
},
};
}

View file

@ -0,0 +1,249 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { elasticsearchServiceMock, loggingSystemMock } from '@kbn/core/server/mocks';
import { getTotalCountAggregations, getTotalCountInUse } from './get_telemetry_from_kibana';
const elasticsearch = elasticsearchServiceMock.createStart();
const esClient = elasticsearch.client.asInternalUser;
const logger: ReturnType<typeof loggingSystemMock.createLogger> = loggingSystemMock.createLogger();
describe('kibana index telemetry', () => {
beforeEach(() => {
jest.resetAllMocks();
});
describe('getTotalCountAggregations', () => {
test('should return rule counts by rule type id, stats about schedule and throttle intervals and number of actions', async () => {
esClient.search.mockResponseOnce({
took: 4,
timed_out: false,
_shards: {
total: 1,
successful: 1,
skipped: 0,
failed: 0,
},
hits: {
total: {
value: 4,
relation: 'eq',
},
max_score: null,
hits: [],
},
aggregations: {
by_rule_type_id: {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [
{
key: '.index-threshold',
doc_count: 2,
},
{
key: 'logs.alert.document.count',
doc_count: 1,
},
{
key: 'document.test.',
doc_count: 1,
},
],
},
max_throttle_time: { value: 60 },
min_throttle_time: { value: 0 },
avg_throttle_time: { value: 30 },
max_interval_time: { value: 10 },
min_interval_time: { value: 1 },
avg_interval_time: { value: 4.5 },
max_actions_count: { value: 4 },
min_actions_count: { value: 0 },
avg_actions_count: { value: 2.5 },
},
});
const telemetry = await getTotalCountAggregations({
esClient,
kibanaIndex: 'test',
logger,
});
expect(esClient.search).toHaveBeenCalledTimes(1);
expect(telemetry).toEqual({
connectors_per_alert: {
avg: 2.5,
max: 4,
min: 0,
},
count_by_type: {
'__index-threshold': 2,
document__test__: 1,
// eslint-disable-next-line @typescript-eslint/naming-convention
logs__alert__document__count: 1,
},
count_total: 4,
schedule_time: {
avg: '4.5s',
max: '10s',
min: '1s',
},
schedule_time_number_s: {
avg: 4.5,
max: 10,
min: 1,
},
throttle_time: {
avg: '30s',
max: '60s',
min: '0s',
},
throttle_time_number_s: {
avg: 30,
max: 60,
min: 0,
},
});
});
test('should return empty results and log warning if query throws error', async () => {
esClient.search.mockRejectedValueOnce(new Error('oh no'));
const telemetry = await getTotalCountAggregations({
esClient,
kibanaIndex: 'test',
logger,
});
expect(esClient.search).toHaveBeenCalledTimes(1);
const loggerCall = logger.warn.mock.calls[0][0];
const loggerMeta = logger.warn.mock.calls[0][1];
expect(loggerCall as string).toMatchInlineSnapshot(
`"Error executing alerting telemetry task: getTotalCountAggregations - {}"`
);
expect(loggerMeta?.tags).toEqual(['alerting', 'telemetry-failed']);
expect(loggerMeta?.error?.stack_trace).toBeDefined();
expect(telemetry).toEqual({
connectors_per_alert: {
avg: 0,
max: 0,
min: 0,
},
count_by_type: {},
count_total: 0,
schedule_time: {
avg: '0s',
max: '0s',
min: '0s',
},
schedule_time_number_s: {
avg: 0,
max: 0,
min: 0,
},
throttle_time: {
avg: '0s',
max: '0s',
min: '0s',
},
throttle_time_number_s: {
avg: 0,
max: 0,
min: 0,
},
});
});
});
describe('getTotalCountInUse', () => {
test('should return enabled rule counts by rule type id and number of namespaces', async () => {
esClient.search.mockResponseOnce({
took: 4,
timed_out: false,
_shards: {
total: 1,
successful: 1,
skipped: 0,
failed: 0,
},
hits: {
total: {
value: 4,
relation: 'eq',
},
max_score: null,
hits: [],
},
aggregations: {
namespaces_count: { value: 1 },
by_rule_type_id: {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [
{
key: '.index-threshold',
doc_count: 2,
},
{
key: 'logs.alert.document.count',
doc_count: 1,
},
{
key: 'document.test.',
doc_count: 1,
},
],
},
},
});
const telemetry = await getTotalCountInUse({
esClient,
kibanaIndex: 'test',
logger,
});
expect(esClient.search).toHaveBeenCalledTimes(1);
expect(telemetry).toStrictEqual({
countByType: {
'__index-threshold': 2,
document__test__: 1,
// eslint-disable-next-line @typescript-eslint/naming-convention
logs__alert__document__count: 1,
},
countNamespaces: 1,
countTotal: 4,
});
});
test('should return empty results and log warning if query throws error', async () => {
esClient.search.mockRejectedValueOnce(new Error('oh no'));
const telemetry = await getTotalCountInUse({
esClient,
kibanaIndex: 'test',
logger,
});
expect(esClient.search).toHaveBeenCalledTimes(1);
const loggerCall = logger.warn.mock.calls[0][0];
const loggerMeta = logger.warn.mock.calls[0][1];
expect(loggerCall as string).toMatchInlineSnapshot(
`"Error executing alerting telemetry task: getTotalCountInUse - {}"`
);
expect(loggerMeta?.tags).toEqual(['alerting', 'telemetry-failed']);
expect(loggerMeta?.error?.stack_trace).toBeDefined();
expect(telemetry).toStrictEqual({
countByType: {},
countNamespaces: 0,
countTotal: 0,
});
});
});
});

View file

@ -0,0 +1,317 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type {
AggregationsSingleMetricAggregateBase,
AggregationsCardinalityAggregate,
AggregationsTermsAggregateBase,
AggregationsStringTermsBucketKeys,
} from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { ElasticsearchClient, Logger } from '@kbn/core/server';
import { AlertingUsage } from '../types';
import { NUM_ALERTING_RULE_TYPES } from '../alerting_usage_collector';
import { parseSimpleRuleTypeBucket } from './parse_simple_rule_type_bucket';
interface Opts {
esClient: ElasticsearchClient;
kibanaIndex: string;
logger: Logger;
}
type GetTotalCountsResults = Pick<
AlertingUsage,
| 'count_total'
| 'count_by_type'
| 'throttle_time'
| 'schedule_time'
| 'throttle_time_number_s'
| 'schedule_time_number_s'
| 'connectors_per_alert'
>;
interface GetTotalCountInUseResults {
countTotal: number;
countByType: Record<string, number>;
countNamespaces: number;
}
export async function getTotalCountAggregations({
esClient,
kibanaIndex,
logger,
}: Opts): Promise<GetTotalCountsResults> {
try {
const query = {
index: kibanaIndex,
size: 0,
body: {
query: {
bool: {
// Aggregate over all rule saved objects
filter: [{ term: { type: 'alert' } }],
},
},
runtime_mappings: {
rule_action_count: {
type: 'long',
script: {
source: `
def alert = params._source['alert'];
if (alert != null) {
def actions = alert.actions;
if (actions != null) {
emit(actions.length);
} else {
emit(0);
}
}`,
},
},
// Convert schedule interval duration string from rule saved object to interval in seconds
rule_schedule_interval: {
type: 'long',
script: {
source: `
int parsed = 0;
if (doc['alert.schedule.interval'].size() > 0) {
def interval = doc['alert.schedule.interval'].value;
if (interval.length() > 1) {
// get last char
String timeChar = interval.substring(interval.length() - 1);
// remove last char
interval = interval.substring(0, interval.length() - 1);
if (interval.chars().allMatch(Character::isDigit)) {
// using of regex is not allowed in painless language
parsed = Integer.parseInt(interval);
if (timeChar.equals("s")) {
parsed = parsed;
} else if (timeChar.equals("m")) {
parsed = parsed * 60;
} else if (timeChar.equals("h")) {
parsed = parsed * 60 * 60;
} else if (timeChar.equals("d")) {
parsed = parsed * 24 * 60 * 60;
}
emit(parsed);
}
}
}
emit(parsed);
`,
},
},
// Convert throttle interval duration string from rule saved object to interval in seconds
rule_throttle_interval: {
type: 'long',
script: {
source: `
int parsed = 0;
if (doc['alert.throttle'].size() > 0) {
def throttle = doc['alert.throttle'].value;
if (throttle.length() > 1) {
// get last char
String timeChar = throttle.substring(throttle.length() - 1);
// remove last char
throttle = throttle.substring(0, throttle.length() - 1);
if (throttle.chars().allMatch(Character::isDigit)) {
// using of regex is not allowed in painless language
parsed = Integer.parseInt(throttle);
if (timeChar.equals("s")) {
parsed = parsed;
} else if (timeChar.equals("m")) {
parsed = parsed * 60;
} else if (timeChar.equals("h")) {
parsed = parsed * 60 * 60;
} else if (timeChar.equals("d")) {
parsed = parsed * 24 * 60 * 60;
}
emit(parsed);
}
}
}
emit(parsed);
`,
},
},
},
aggs: {
by_rule_type_id: {
terms: {
field: 'alert.alertTypeId',
size: NUM_ALERTING_RULE_TYPES,
},
},
max_throttle_time: { max: { field: 'rule_throttle_interval' } },
min_throttle_time: { min: { field: 'rule_throttle_interval' } },
avg_throttle_time: { avg: { field: 'rule_throttle_interval' } },
max_interval_time: { max: { field: 'rule_schedule_interval' } },
min_interval_time: { min: { field: 'rule_schedule_interval' } },
avg_interval_time: { avg: { field: 'rule_schedule_interval' } },
max_actions_count: { max: { field: 'rule_action_count' } },
min_actions_count: { min: { field: 'rule_action_count' } },
avg_actions_count: { avg: { field: 'rule_action_count' } },
},
},
};
logger.debug(`query for getTotalCountAggregations - ${JSON.stringify(query)}`);
const results = await esClient.search(query);
logger.debug(`results for getTotalCountAggregations query - ${JSON.stringify(results)}`);
const aggregations = results.aggregations as {
by_rule_type_id: AggregationsTermsAggregateBase<AggregationsStringTermsBucketKeys>;
max_throttle_time: AggregationsSingleMetricAggregateBase;
min_throttle_time: AggregationsSingleMetricAggregateBase;
avg_throttle_time: AggregationsSingleMetricAggregateBase;
max_interval_time: AggregationsSingleMetricAggregateBase;
min_interval_time: AggregationsSingleMetricAggregateBase;
avg_interval_time: AggregationsSingleMetricAggregateBase;
max_actions_count: AggregationsSingleMetricAggregateBase;
min_actions_count: AggregationsSingleMetricAggregateBase;
avg_actions_count: AggregationsSingleMetricAggregateBase;
};
const totalRulesCount =
typeof results.hits.total === 'number' ? results.hits.total : results.hits.total?.value;
return {
count_total: totalRulesCount ?? 0,
count_by_type: parseSimpleRuleTypeBucket(aggregations.by_rule_type_id.buckets),
throttle_time: {
min: `${aggregations.min_throttle_time.value ?? 0}s`,
avg: `${aggregations.avg_throttle_time.value ?? 0}s`,
max: `${aggregations.max_throttle_time.value ?? 0}s`,
},
schedule_time: {
min: `${aggregations.min_interval_time.value ?? 0}s`,
avg: `${aggregations.avg_interval_time.value ?? 0}s`,
max: `${aggregations.max_interval_time.value ?? 0}s`,
},
throttle_time_number_s: {
min: aggregations.min_throttle_time.value ?? 0,
avg: aggregations.avg_throttle_time.value ?? 0,
max: aggregations.max_throttle_time.value ?? 0,
},
schedule_time_number_s: {
min: aggregations.min_interval_time.value ?? 0,
avg: aggregations.avg_interval_time.value ?? 0,
max: aggregations.max_interval_time.value ?? 0,
},
connectors_per_alert: {
min: aggregations.min_actions_count.value ?? 0,
avg: aggregations.avg_actions_count.value ?? 0,
max: aggregations.max_actions_count.value ?? 0,
},
};
} catch (err) {
logger.warn(
`Error executing alerting telemetry task: getTotalCountAggregations - ${JSON.stringify(err)}`,
{
tags: ['alerting', 'telemetry-failed'],
error: { stack_trace: err.stack },
}
);
return {
count_total: 0,
count_by_type: {},
throttle_time: {
min: '0s',
avg: '0s',
max: '0s',
},
schedule_time: {
min: '0s',
avg: '0s',
max: '0s',
},
throttle_time_number_s: {
min: 0,
avg: 0,
max: 0,
},
schedule_time_number_s: {
min: 0,
avg: 0,
max: 0,
},
connectors_per_alert: {
min: 0,
avg: 0,
max: 0,
},
};
}
}
export async function getTotalCountInUse({
esClient,
kibanaIndex,
logger,
}: Opts): Promise<GetTotalCountInUseResults> {
try {
const query = {
index: kibanaIndex,
size: 0,
body: {
query: {
bool: {
// Aggregate over only enabled rule saved objects
filter: [{ term: { type: 'alert' } }, { term: { 'alert.enabled': true } }],
},
},
aggs: {
namespaces_count: { cardinality: { field: 'namespaces' } },
by_rule_type_id: {
terms: {
field: 'alert.alertTypeId',
size: NUM_ALERTING_RULE_TYPES,
},
},
},
},
};
logger.debug(`query for getTotalCountInUse - ${JSON.stringify(query)}`);
const results = await esClient.search(query);
logger.debug(`results for getTotalCountInUse query - ${JSON.stringify(results)}`);
const aggregations = results.aggregations as {
by_rule_type_id: AggregationsTermsAggregateBase<AggregationsStringTermsBucketKeys>;
namespaces_count: AggregationsCardinalityAggregate;
};
const totalEnabledRulesCount =
typeof results.hits.total === 'number' ? results.hits.total : results.hits.total?.value;
return {
countTotal: totalEnabledRulesCount ?? 0,
countByType: parseSimpleRuleTypeBucket(aggregations.by_rule_type_id.buckets),
countNamespaces: aggregations.namespaces_count.value ?? 0,
};
} catch (err) {
logger.warn(
`Error executing alerting telemetry task: getTotalCountInUse - ${JSON.stringify(err)}`,
{
tags: ['alerting', 'telemetry-failed'],
error: { stack_trace: err.stack },
}
);
return {
countTotal: 0,
countByType: {},
countNamespaces: 0,
};
}
}

View file

@ -0,0 +1,256 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { elasticsearchServiceMock, loggingSystemMock } from '@kbn/core/server/mocks';
import {
getFailedAndUnrecognizedTasksPerDay,
parseBucket,
} from './get_telemetry_from_task_manager';
const elasticsearch = elasticsearchServiceMock.createStart();
const esClient = elasticsearch.client.asInternalUser;
const logger: ReturnType<typeof loggingSystemMock.createLogger> = loggingSystemMock.createLogger();
describe('task manager telemetry', () => {
beforeEach(() => {
jest.resetAllMocks();
});
describe('parseBucket', () => {
test('should correctly parse aggregation bucket results', () => {
expect(
parseBucket([
{
key: 'failed',
doc_count: 36,
by_task_type: {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [
{
key: 'alerting:.index-threshold',
doc_count: 4,
},
{
key: 'alerting:document.test.',
doc_count: 32,
},
],
},
},
{
key: 'unrecognized',
doc_count: 4,
by_task_type: {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [
{
key: 'alerting:logs.alert.document.count',
doc_count: 4,
},
],
},
},
])
).toEqual({
countFailedAndUnrecognizedTasksByStatus: {
failed: 36,
unrecognized: 4,
},
countFailedAndUnrecognizedTasksByStatusByType: {
failed: {
'__index-threshold': 4,
document__test__: 32,
},
unrecognized: {
// eslint-disable-next-line @typescript-eslint/naming-convention
logs__alert__document__count: 4,
},
},
});
});
test('should handle missing values', () => {
expect(
parseBucket([
{
key: 'failed',
by_task_type: {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [
{
key: 'alerting:.index-threshold',
doc_count: 4,
},
// @ts-expect-error
{
key: 'alerting:document.test.',
},
],
},
},
{
key: 'unrecognized',
doc_count: 4,
// @ts-expect-error
by_task_type: {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
},
},
// @ts-expect-error
{
key: 'another_key',
},
])
).toEqual({
countFailedAndUnrecognizedTasksByStatus: {
failed: 0,
unrecognized: 4,
another_key: 0,
},
countFailedAndUnrecognizedTasksByStatusByType: {
failed: {
'__index-threshold': 4,
document__test__: 0,
},
},
});
});
test('should handle empty input', () => {
expect(parseBucket([])).toEqual({
countFailedAndUnrecognizedTasksByStatus: {},
countFailedAndUnrecognizedTasksByStatusByType: {},
});
});
test('should handle undefined input', () => {
// @ts-expect-error
expect(parseBucket(undefined)).toEqual({
countFailedAndUnrecognizedTasksByStatus: {},
countFailedAndUnrecognizedTasksByStatusByType: {},
});
});
});
describe('getFailedAndUnrecognizedTasksPerDay', () => {
test('should return counts of failed and unrecognized tasks broken down by status and rule type', async () => {
esClient.search.mockResponse({
took: 4,
timed_out: false,
_shards: {
total: 1,
successful: 1,
skipped: 0,
failed: 0,
},
hits: {
total: {
value: 40,
relation: 'eq',
},
max_score: null,
hits: [],
},
aggregations: {
by_status: {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [
{
key: 'failed',
doc_count: 36,
by_task_type: {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [
{
key: 'alerting:.index-threshold',
doc_count: 4,
},
{
key: 'alerting:document.test.',
doc_count: 32,
},
],
},
},
{
key: 'unrecognized',
doc_count: 4,
by_task_type: {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [
{
key: 'alerting:logs.alert.document.count',
doc_count: 4,
},
],
},
},
],
},
},
});
const telemetry = await getFailedAndUnrecognizedTasksPerDay({
esClient,
taskManagerIndex: 'test',
logger,
});
expect(esClient.search).toHaveBeenCalledTimes(1);
expect(telemetry).toStrictEqual({
countFailedAndUnrecognizedTasks: 40,
countFailedAndUnrecognizedTasksByStatus: {
failed: 36,
unrecognized: 4,
},
countFailedAndUnrecognizedTasksByStatusByType: {
failed: {
'__index-threshold': 4,
document__test__: 32,
},
unrecognized: {
// eslint-disable-next-line @typescript-eslint/naming-convention
logs__alert__document__count: 4,
},
},
});
});
test('should return empty results and log warning if query throws error', async () => {
esClient.search.mockRejectedValue(new Error('oh no'));
const telemetry = await getFailedAndUnrecognizedTasksPerDay({
esClient,
taskManagerIndex: 'test',
logger,
});
expect(esClient.search).toHaveBeenCalledTimes(1);
const loggerCall = logger.warn.mock.calls[0][0];
const loggerMeta = logger.warn.mock.calls[0][1];
expect(loggerCall as string).toMatchInlineSnapshot(
`"Error executing alerting telemetry task: getFailedAndUnrecognizedTasksPerDay - {}"`
);
expect(loggerMeta?.tags).toEqual(['alerting', 'telemetry-failed']);
expect(loggerMeta?.error?.stack_trace).toBeDefined();
expect(telemetry).toStrictEqual({
countFailedAndUnrecognizedTasks: 0,
countFailedAndUnrecognizedTasksByStatus: {},
countFailedAndUnrecognizedTasksByStatusByType: {},
});
});
});
});

View file

@ -0,0 +1,199 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { isEmpty, merge } from 'lodash';
import type {
AggregationsTermsAggregateBase,
AggregationsStringTermsBucketKeys,
AggregationsBuckets,
} from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { ElasticsearchClient, Logger } from '@kbn/core/server';
import { replaceDotSymbols } from './replace_dots_with_underscores';
import { NUM_ALERTING_RULE_TYPES } from '../alerting_usage_collector';
interface Opts {
esClient: ElasticsearchClient;
taskManagerIndex: string;
logger: Logger;
}
interface GetFailedAndUnrecognizedTasksAggregationBucket extends AggregationsStringTermsBucketKeys {
by_task_type: AggregationsTermsAggregateBase<AggregationsStringTermsBucketKeys>;
}
interface GetFailedAndUnrecognizedTasksResults {
countFailedAndUnrecognizedTasks: number;
countFailedAndUnrecognizedTasksByStatus: Record<string, number>;
countFailedAndUnrecognizedTasksByStatusByType: Record<string, Record<string, number>>;
}
export async function getFailedAndUnrecognizedTasksPerDay({
esClient,
taskManagerIndex,
logger,
}: Opts): Promise<GetFailedAndUnrecognizedTasksResults> {
try {
const query = {
index: taskManagerIndex,
size: 0,
body: {
query: {
bool: {
must: [
{
bool: {
should: [
{
term: {
'task.status': 'unrecognized',
},
},
{
term: {
'task.status': 'failed',
},
},
],
},
},
{
wildcard: {
'task.taskType': {
value: 'alerting:*',
},
},
},
{
range: {
'task.runAt': {
gte: 'now-1d',
},
},
},
],
},
},
aggs: {
by_status: {
terms: {
field: 'task.status',
size: 10,
},
aggs: {
by_task_type: {
terms: {
field: 'task.taskType',
// Use number of alerting rule types because we're filtering by 'alerting:'
size: NUM_ALERTING_RULE_TYPES,
},
},
},
},
},
},
};
logger.debug(`query for getFailedAndUnrecognizedTasksPerDay - ${JSON.stringify(query)}`);
const results = await esClient.search(query);
logger.debug(
`results for getFailedAndUnrecognizedTasksPerDay query - ${JSON.stringify(results)}`
);
const aggregations = results.aggregations as {
by_status: AggregationsTermsAggregateBase<GetFailedAndUnrecognizedTasksAggregationBucket>;
};
const totalFailedAndUnrecognizedTasks =
typeof results.hits.total === 'number' ? results.hits.total : results.hits.total?.value;
const aggregationsByStatus: AggregationsBuckets<GetFailedAndUnrecognizedTasksAggregationBucket> =
aggregations.by_status.buckets as GetFailedAndUnrecognizedTasksAggregationBucket[];
return {
...parseBucket(aggregationsByStatus),
countFailedAndUnrecognizedTasks: totalFailedAndUnrecognizedTasks ?? 0,
};
} catch (err) {
logger.warn(
`Error executing alerting telemetry task: getFailedAndUnrecognizedTasksPerDay - ${JSON.stringify(
err
)}`,
{
tags: ['alerting', 'telemetry-failed'],
error: { stack_trace: err.stack },
}
);
return {
countFailedAndUnrecognizedTasks: 0,
countFailedAndUnrecognizedTasksByStatus: {},
countFailedAndUnrecognizedTasksByStatusByType: {},
};
}
}
/**
* Bucket format:
* {
* "key": "idle", // task status
* "doc_count": 28, // number of tasks with this status
* "by_task_type": {
* "doc_count_error_upper_bound": 0,
* "sum_other_doc_count": 0,
* "buckets": [
* {
* "key": "alerting:.es-query", // breakdown of task type for status
* "doc_count": 1
* },
* {
* "key": "alerting:.index-threshold",
* "doc_count": 1
* }
* ]
* }
* }
*/
export function parseBucket(
buckets: GetFailedAndUnrecognizedTasksAggregationBucket[]
): Pick<
GetFailedAndUnrecognizedTasksResults,
'countFailedAndUnrecognizedTasksByStatus' | 'countFailedAndUnrecognizedTasksByStatusByType'
> {
return (buckets ?? []).reduce(
(summary, bucket) => {
const status: string = bucket.key;
const taskTypeBuckets = bucket?.by_task_type?.buckets as AggregationsStringTermsBucketKeys[];
const byTaskType = (taskTypeBuckets ?? []).reduce(
(acc: Record<string, number>, taskTypeBucket: AggregationsStringTermsBucketKeys) => {
const taskType: string = replaceDotSymbols(taskTypeBucket.key.replace('alerting:', ''));
return {
...acc,
[taskType]: taskTypeBucket.doc_count ?? 0,
};
},
{}
);
return {
...summary,
countFailedAndUnrecognizedTasksByStatus: {
...summary.countFailedAndUnrecognizedTasksByStatus,
[status]: bucket?.doc_count ?? 0,
},
countFailedAndUnrecognizedTasksByStatusByType: merge(
summary.countFailedAndUnrecognizedTasksByStatusByType,
isEmpty(byTaskType) ? {} : { [status]: byTaskType }
),
};
},
{
countFailedAndUnrecognizedTasksByStatus: {},
countFailedAndUnrecognizedTasksByStatusByType: {},
}
);
}

View file

@ -0,0 +1,67 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { parseSimpleRuleTypeBucket } from './parse_simple_rule_type_bucket';
describe('parseSimpleRuleTypeBucket', () => {
test('should correctly parse rule type bucket results', () => {
expect(
parseSimpleRuleTypeBucket([
{
key: '.index-threshold',
doc_count: 78,
},
{
key: 'document.test.',
doc_count: 42,
},
{
key: 'logs.alert.document.count',
doc_count: 28,
},
])
).toEqual({
'__index-threshold': 78,
document__test__: 42,
// eslint-disable-next-line @typescript-eslint/naming-convention
logs__alert__document__count: 28,
});
});
test('should handle missing values', () => {
expect(
parseSimpleRuleTypeBucket([
// @ts-expect-error
{
key: '.index-threshold',
},
{
key: 'document.test.',
doc_count: 42,
},
{
key: 'logs.alert.document.count',
doc_count: 28,
},
])
).toEqual({
'__index-threshold': 0,
document__test__: 42,
// eslint-disable-next-line @typescript-eslint/naming-convention
logs__alert__document__count: 28,
});
});
test('should handle empty input', () => {
expect(parseSimpleRuleTypeBucket([])).toEqual({});
});
test('should handle undefined input', () => {
// @ts-expect-error
expect(parseSimpleRuleTypeBucket(undefined)).toEqual({});
});
});

View file

@ -0,0 +1,25 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import {
AggregationsBuckets,
AggregationsStringTermsBucketKeys,
} from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { replaceDotSymbols } from './replace_dots_with_underscores';
export function parseSimpleRuleTypeBucket(
ruleTypeBuckets: AggregationsBuckets<AggregationsStringTermsBucketKeys>
) {
const buckets = ruleTypeBuckets as AggregationsStringTermsBucketKeys[];
return (buckets ?? []).reduce((acc, bucket: AggregationsStringTermsBucketKeys) => {
const ruleType: string = replaceDotSymbols(bucket.key);
return {
...acc,
[ruleType]: bucket.doc_count ?? 0,
};
}, {});
}

View file

@ -0,0 +1,14 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { replaceDotSymbols } from './replace_dots_with_underscores';
describe('replaceDotSymbols', () => {
test('should replace "." symbols with "__" in string', async () => {
expect(replaceDotSymbols('.index-threshold')).toEqual('__index-threshold');
});
});

View file

@ -0,0 +1,10 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
export function replaceDotSymbols(strToReplace: string) {
return strToReplace.replaceAll('.', '__');
}

View file

@ -13,13 +13,12 @@ import {
TaskManagerStartContract,
} from '@kbn/task-manager-plugin/server';
import { getFailedAndUnrecognizedTasksPerDay } from './lib/get_telemetry_from_task_manager';
import { getTotalCountAggregations, getTotalCountInUse } from './lib/get_telemetry_from_kibana';
import {
getTotalCountAggregations,
getTotalCountInUse,
getExecutionsPerDayCount,
getExecutionTimeoutsPerDayCount,
getFailedAndUnrecognizedTasksPerDay,
} from './alerting_telemetry';
} from './lib/get_telemetry_from_event_log';
export const TELEMETRY_TASK_TYPE = 'alerting_telemetry';
@ -98,11 +97,11 @@ export function telemetryTaskRunner(
async run() {
const esClient = await getEsClient();
return Promise.all([
getTotalCountAggregations(esClient, kibanaIndex, logger),
getTotalCountInUse(esClient, kibanaIndex, logger),
getExecutionsPerDayCount(esClient, eventLogIndex, logger),
getExecutionTimeoutsPerDayCount(esClient, eventLogIndex, logger),
getFailedAndUnrecognizedTasksPerDay(esClient, taskManagerIndex, logger),
getTotalCountAggregations({ esClient, kibanaIndex, logger }),
getTotalCountInUse({ esClient, kibanaIndex, logger }),
getExecutionsPerDayCount({ esClient, eventLogIndex, logger }),
getExecutionTimeoutsPerDayCount({ esClient, eventLogIndex, logger }),
getFailedAndUnrecognizedTasksPerDay({ esClient, taskManagerIndex, logger }),
])
.then(
([
@ -120,22 +119,25 @@ export function telemetryTaskRunner(
count_active_total: totalInUse.countTotal,
count_disabled_total: totalCountAggregations.count_total - totalInUse.countTotal,
count_rules_namespaces: totalInUse.countNamespaces,
count_rules_executions_per_day: dailyExecutionCounts.countTotal,
count_rules_executions_by_type_per_day: dailyExecutionCounts.countByType,
count_rules_executions_failured_per_day: dailyExecutionCounts.countTotalFailures,
count_rules_executions_per_day: dailyExecutionCounts.countTotalRuleExecutions,
count_rules_executions_by_type_per_day:
dailyExecutionCounts.countRuleExecutionsByType,
count_rules_executions_failured_per_day:
dailyExecutionCounts.countTotalFailedExecutions,
count_rules_executions_failured_by_reason_per_day:
dailyExecutionCounts.countFailuresByReason,
dailyExecutionCounts.countFailedExecutionsByReason,
count_rules_executions_failured_by_reason_by_type_per_day:
dailyExecutionCounts.countFailuresByReasonByType,
count_rules_executions_timeouts_per_day: dailyExecutionTimeoutCounts.countTotal,
dailyExecutionCounts.countFailedExecutionsByReasonByType,
count_rules_executions_timeouts_per_day:
dailyExecutionTimeoutCounts.countExecutionTimeouts,
count_rules_executions_timeouts_by_type_per_day:
dailyExecutionTimeoutCounts.countByType,
dailyExecutionTimeoutCounts.countExecutionTimeoutsByType,
count_failed_and_unrecognized_rule_tasks_per_day:
dailyFailedAndUnrecognizedTasks.countTotal,
dailyFailedAndUnrecognizedTasks.countFailedAndUnrecognizedTasks,
count_failed_and_unrecognized_rule_tasks_by_status_per_day:
dailyFailedAndUnrecognizedTasks.countByStatus,
dailyFailedAndUnrecognizedTasks.countFailedAndUnrecognizedTasksByStatus,
count_failed_and_unrecognized_rule_tasks_by_status_by_type_per_day:
dailyFailedAndUnrecognizedTasks.countByStatusByRuleType,
dailyFailedAndUnrecognizedTasks.countFailedAndUnrecognizedTasksByStatusByType,
avg_execution_time_per_day: dailyExecutionCounts.avgExecutionTime,
avg_execution_time_by_type_per_day: dailyExecutionCounts.avgExecutionTimeByType,
avg_es_search_duration_per_day: dailyExecutionCounts.avgEsSearchDuration,