[Alerting] Telemetry for long-running/cancelled rules (#123291)

* Renaming alerting telemetry files

* Adding daily counts for execution timeouts

* Threading in usageCounter

* Adding usage counter for alerts after cancellation

* Updating telemetry mappings

* Adding tests

* Adding tests

* Cleanup

* Cleanup

* Adding rule type id to counter name

* Adding new siem rule types

* Replacing all dots with underscores

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Ying Mao 2022-01-26 10:51:32 -05:00 committed by GitHub
parent 740ce6c7aa
commit 83fee75692
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 601 additions and 121 deletions

View file

@ -8,7 +8,7 @@
import type { PublicMethodsOf } from '@kbn/utility-types';
import { first } from 'rxjs/operators';
import { BehaviorSubject } from 'rxjs';
import { UsageCollectionSetup } from 'src/plugins/usage_collection/server';
import { UsageCollectionSetup, UsageCounter } from 'src/plugins/usage_collection/server';
import { SecurityPluginSetup, SecurityPluginStart } from '../../security/server';
import {
EncryptedSavedObjectsPluginSetup,
@ -51,7 +51,7 @@ import {
AlertTypeState,
Services,
} from './types';
import { registerAlertsUsageCollector } from './usage';
import { registerAlertingUsageCollector } from './usage';
import { initializeAlertingTelemetry, scheduleAlertingTelemetry } from './usage/task';
import { IEventLogger, IEventLogService, IEventLogClientService } from '../../event_log/server';
import { PluginStartContract as FeaturesPluginStart } from '../../features/server';
@ -153,6 +153,7 @@ export class AlertingPlugin {
private eventLogService?: IEventLogService;
private eventLogger?: IEventLogger;
private kibanaBaseUrl: string | undefined;
private usageCounter: UsageCounter | undefined;
constructor(initializerContext: PluginInitializerContext) {
this.config = initializerContext.config.create<AlertsConfig>().pipe(first()).toPromise();
@ -208,7 +209,7 @@ export class AlertingPlugin {
const usageCollection = plugins.usageCollection;
if (usageCollection) {
registerAlertsUsageCollector(
registerAlertingUsageCollector(
usageCollection,
core.getStartServices().then(([_, { taskManager }]) => taskManager)
);
@ -223,7 +224,7 @@ export class AlertingPlugin {
}
// Usage counter for telemetry
const usageCounter = plugins.usageCollection?.createUsageCounter(ALERTS_FEATURE_ID);
this.usageCounter = plugins.usageCollection?.createUsageCounter(ALERTS_FEATURE_ID);
setupSavedObjects(
core.savedObjects,
@ -259,7 +260,7 @@ export class AlertingPlugin {
defineRoutes({
router,
licenseState: this.licenseState,
usageCounter,
usageCounter: this.usageCounter,
encryptedSavedObjects: plugins.encryptedSavedObjects,
});
@ -393,6 +394,7 @@ export class AlertingPlugin {
supportsEphemeralTasks: plugins.taskManager.supportsEphemeralTasks(),
maxEphemeralActionsPerRule: config.maxEphemeralActionsPerAlert,
cancelAlertsOnRuleTimeout: config.cancelAlertsOnRuleTimeout,
usageCounter: this.usageCounter,
});
});

View file

@ -7,6 +7,7 @@
import sinon from 'sinon';
import { schema } from '@kbn/config-schema';
import { usageCountersServiceMock } from 'src/plugins/usage_collection/server/usage_counters/usage_counters_service.mock';
import {
AlertExecutorOptions,
AlertTypeParams,
@ -59,6 +60,9 @@ const ruleType: jest.Mocked<UntypedNormalizedRuleType> = {
let fakeTimer: sinon.SinonFakeTimers;
const mockUsageCountersSetup = usageCountersServiceMock.createSetupContract();
const mockUsageCounter = mockUsageCountersSetup.createUsageCounter('test');
describe('Task Runner', () => {
let mockedTaskInstance: ConcreteTaskInstance;
@ -113,6 +117,7 @@ describe('Task Runner', () => {
supportsEphemeralTasks: false,
maxEphemeralActionsPerRule: 10,
cancelAlertsOnRuleTimeout: true,
usageCounter: mockUsageCounter,
};
function testAgainstEphemeralSupport(
@ -397,6 +402,7 @@ describe('Task Runner', () => {
},
expect.any(Function)
);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
testAgainstEphemeralSupport(
@ -683,6 +689,7 @@ describe('Task Runner', () => {
ruleset: 'alerts',
},
});
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
}
);
@ -899,6 +906,7 @@ describe('Task Runner', () => {
ruleset: 'alerts',
},
});
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
testAgainstEphemeralSupport(
@ -965,6 +973,7 @@ describe('Task Runner', () => {
4,
'ruleExecutionStatus for test:1: {"lastExecutionDate":"1970-01-01T00:00:00.000Z","status":"active"}'
);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
}
);
@ -1157,6 +1166,7 @@ describe('Task Runner', () => {
],
]
`);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
testAgainstEphemeralSupport(
@ -1218,6 +1228,7 @@ describe('Task Runner', () => {
});
await taskRunner.run();
expect(enqueueFunction).toHaveBeenCalledTimes(1);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
}
);
@ -1287,6 +1298,7 @@ describe('Task Runner', () => {
});
await taskRunner.run();
expect(enqueueFunction).toHaveBeenCalledTimes(1);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
}
);
@ -1607,6 +1619,7 @@ describe('Task Runner', () => {
],
]
`);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
}
);
@ -2013,6 +2026,7 @@ describe('Task Runner', () => {
},
]
`);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
}
);
@ -2112,6 +2126,7 @@ describe('Task Runner', () => {
expect(enqueueFunction).toHaveBeenCalledTimes(2);
expect((enqueueFunction as jest.Mock).mock.calls[1][0].id).toEqual('1');
expect((enqueueFunction as jest.Mock).mock.calls[0][0].id).toEqual('2');
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
}
);
@ -2246,6 +2261,7 @@ describe('Task Runner', () => {
},
]
`);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
}
);
@ -2501,6 +2517,7 @@ describe('Task Runner', () => {
],
]
`);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test('validates params before executing the alert type', async () => {
@ -2557,6 +2574,7 @@ describe('Task Runner', () => {
expect(taskRunnerFactoryInitializerParams.logger.error).toHaveBeenCalledWith(
`Executing Rule foo:test:1 has resulted in Error: params invalid: [param1]: expected value of type [string] but got [undefined]`
);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test('uses API key when provided', async () => {
@ -2591,6 +2609,7 @@ describe('Task Runner', () => {
request,
'/'
);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test(`doesn't use API key when not provided`, async () => {
@ -2623,6 +2642,7 @@ describe('Task Runner', () => {
request,
'/'
);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test('rescheduled the Alert if the schedule has update during a task run', async () => {
@ -2673,6 +2693,7 @@ describe('Task Runner', () => {
},
}
`);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test('recovers gracefully when the RuleType executor throws an exception', async () => {
@ -2826,6 +2847,7 @@ describe('Task Runner', () => {
],
]
`);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test('recovers gracefully when the Alert Task Runner throws an exception when fetching the encrypted attributes', async () => {
@ -2960,6 +2982,7 @@ describe('Task Runner', () => {
],
]
`);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test('recovers gracefully when the Alert Task Runner throws an exception when license is higher than supported', async () => {
@ -3103,6 +3126,7 @@ describe('Task Runner', () => {
],
]
`);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test('recovers gracefully when the Alert Task Runner throws an exception when getting internal Services', async () => {
@ -3246,6 +3270,7 @@ describe('Task Runner', () => {
],
]
`);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test('recovers gracefully when the Alert Task Runner throws an exception when fetching attributes', async () => {
@ -3388,6 +3413,7 @@ describe('Task Runner', () => {
],
]
`);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test('recovers gracefully when the Runner of a legacy Alert task which has no schedule throws an exception when fetching attributes', async () => {
@ -3438,6 +3464,7 @@ describe('Task Runner', () => {
"state": Object {},
}
`);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test(`doesn't change previousStartedAt when it fails to run`, async () => {
@ -3484,6 +3511,7 @@ describe('Task Runner', () => {
expect(runnerResult.state.previousStartedAt).toEqual(
new Date(originalAlertSate.previousStartedAt)
);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test('avoids rescheduling a failed Alert Task Runner when it throws due to failing to fetch the alert', async () => {
@ -3525,6 +3553,7 @@ describe('Task Runner', () => {
`Unable to execute rule "1" in the "foo" space because Saved object [alert/1] not found - this rule will not be rescheduled. To restart rule execution, try disabling and re-enabling this rule.`
);
expect(isUnrecoverableError(ex)).toBeTruthy();
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
});
@ -3566,6 +3595,7 @@ describe('Task Runner', () => {
1,
`Unable to execute rule "1" in the "test space" space because Saved object [alert/1] not found - this rule will not be rescheduled. To restart rule execution, try disabling and re-enabling this rule.`
);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
});
@ -3877,6 +3907,7 @@ describe('Task Runner', () => {
],
]
`);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test('duration is updated for active alerts when alert state contains start time', async () => {
@ -4118,6 +4149,7 @@ describe('Task Runner', () => {
],
]
`);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test('duration is not calculated for active alerts when alert state does not contain start time', async () => {
@ -4347,6 +4379,7 @@ describe('Task Runner', () => {
],
]
`);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test('end is logged for active alerts when alert state contains start time and alert recovers', async () => {
@ -4575,6 +4608,7 @@ describe('Task Runner', () => {
],
]
`);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test('end calculation is skipped for active alerts when alert state does not contain start time and alert recovers', async () => {
@ -4799,6 +4833,7 @@ describe('Task Runner', () => {
],
]
`);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test('successfully executes the task with ephemeral tasks enabled', async () => {
@ -4989,6 +5024,7 @@ describe('Task Runner', () => {
},
{ refresh: false, namespace: undefined }
);
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test('successfully bails on execution if the rule is disabled', async () => {
@ -5083,6 +5119,7 @@ describe('Task Runner', () => {
},
message: 'test:1: execution failed',
});
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test('successfully stores successful runs', async () => {

View file

@ -8,6 +8,7 @@ import apm from 'elastic-apm-node';
import type { PublicMethodsOf } from '@kbn/utility-types';
import { Dictionary, pickBy, mapValues, without, cloneDeep } from 'lodash';
import type { Request } from '@hapi/hapi';
import { UsageCounter } from 'src/plugins/usage_collection/server';
import uuid from 'uuid';
import { addSpaceIdToPath } from '../../../spaces/server';
import { Logger, KibanaRequest } from '../../../../../src/core/server';
@ -109,6 +110,7 @@ export class TaskRunner<
>;
private readonly executionId: string;
private readonly ruleTypeRegistry: RuleTypeRegistry;
private usageCounter?: UsageCounter;
private searchAbortController: AbortController;
private cancelled: boolean;
@ -127,6 +129,7 @@ export class TaskRunner<
) {
this.context = context;
this.logger = context.logger;
this.usageCounter = context.usageCounter;
this.ruleType = ruleType;
this.ruleName = null;
this.taskInstance = taskInstanceToAlertTaskInstance(taskInstance);
@ -256,6 +259,18 @@ export class TaskRunner<
return !this.context.cancelAlertsOnRuleTimeout || !this.ruleType.cancelAlertsOnRuleTimeout;
}
private countUsageOfActionExecutionAfterRuleCancellation() {
if (this.cancelled && this.usageCounter) {
if (this.context.cancelAlertsOnRuleTimeout && this.ruleType.cancelAlertsOnRuleTimeout) {
// Increment usage counter for skipped actions
this.usageCounter.incrementCounter({
counterName: `alertsSkippedDueToRuleExecutionTimeout_${this.ruleType.id}`,
incrementBy: 1,
});
}
}
}
async executeAlert(
alertId: string,
alert: AlertInstance<InstanceState, InstanceContext>,
@ -378,6 +393,7 @@ export class TaskRunner<
event.error.message = err.message;
event.event = event.event || {};
event.event.outcome = 'failure';
throw new ErrorWithReason(AlertExecutionStatusErrorReasons.Execute, err);
}
@ -483,6 +499,12 @@ export class TaskRunner<
this.logger.debug(
`no scheduling of actions for rule ${ruleLabel}: rule execution has been cancelled.`
);
// Usage counter for telemetry
// This keeps track of how many times action executions were skipped after rule
// execution completed successfully after the execution timeout
// This can occur when rule executors do not short circuit execution in response
// to timeout
this.countUsageOfActionExecutionAfterRuleCancellation();
}
}

View file

@ -6,6 +6,7 @@
*/
import sinon from 'sinon';
import { usageCountersServiceMock } from 'src/plugins/usage_collection/server/usage_counters/usage_counters_service.mock';
import {
AlertExecutorOptions,
AlertTypeParams,
@ -52,6 +53,9 @@ const ruleType: jest.Mocked<UntypedNormalizedRuleType> = {
let fakeTimer: sinon.SinonFakeTimers;
const mockUsageCountersSetup = usageCountersServiceMock.createSetupContract();
const mockUsageCounter = mockUsageCountersSetup.createUsageCounter('test');
describe('Task Runner Cancel', () => {
let mockedTaskInstance: ConcreteTaskInstance;
@ -106,6 +110,7 @@ describe('Task Runner Cancel', () => {
supportsEphemeralTasks: false,
maxEphemeralActionsPerRule: 10,
cancelAlertsOnRuleTimeout: true,
usageCounter: mockUsageCounter,
};
const mockDate = new Date('2019-02-12T21:01:22.479Z');
@ -333,6 +338,11 @@ describe('Task Runner Cancel', () => {
},
{ refresh: false, namespace: undefined }
);
expect(mockUsageCounter.incrementCounter).toHaveBeenCalledTimes(1);
expect(mockUsageCounter.incrementCounter).toHaveBeenCalledWith({
counterName: 'alertsSkippedDueToRuleExecutionTimeout_test',
incrementBy: 1,
});
});
test('actionsPlugin.execute is called if rule execution is cancelled but cancelAlertsOnRuleTimeout from config is false', async () => {
@ -361,6 +371,8 @@ describe('Task Runner Cancel', () => {
await promise;
testActionsExecute();
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test('actionsPlugin.execute is called if rule execution is cancelled but cancelAlertsOnRuleTimeout for ruleType is false', async () => {
@ -397,6 +409,8 @@ describe('Task Runner Cancel', () => {
await promise;
testActionsExecute();
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
test('actionsPlugin.execute is skipped if rule execution is cancelled and cancelAlertsOnRuleTimeout for both config and ruleType are true', async () => {
@ -563,6 +577,12 @@ describe('Task Runner Cancel', () => {
ruleset: 'alerts',
},
});
expect(mockUsageCounter.incrementCounter).toHaveBeenCalledTimes(1);
expect(mockUsageCounter.incrementCounter).toHaveBeenCalledWith({
counterName: 'alertsSkippedDueToRuleExecutionTimeout_test',
incrementBy: 1,
});
});
function testActionsExecute() {

View file

@ -6,6 +6,7 @@
*/
import sinon from 'sinon';
import { usageCountersServiceMock } from 'src/plugins/usage_collection/server/usage_counters/usage_counters_service.mock';
import { ConcreteTaskInstance, TaskStatus } from '../../../task_manager/server';
import { TaskRunnerContext, TaskRunnerFactory } from './task_runner_factory';
import { encryptedSavedObjectsMock } from '../../../encrypted_saved_objects/server/mocks';
@ -22,7 +23,8 @@ import { ruleTypeRegistryMock } from '../rule_type_registry.mock';
import { executionContextServiceMock } from '../../../../../src/core/server/mocks';
const executionContext = executionContextServiceMock.createSetupContract();
const mockUsageCountersSetup = usageCountersServiceMock.createSetupContract();
const mockUsageCounter = mockUsageCountersSetup.createUsageCounter('test');
const ruleType: UntypedNormalizedRuleType = {
id: 'test',
name: 'My test alert',
@ -86,6 +88,7 @@ describe('Task Runner Factory', () => {
maxEphemeralActionsPerRule: 10,
cancelAlertsOnRuleTimeout: true,
executionContext,
usageCounter: mockUsageCounter,
};
beforeEach(() => {

View file

@ -6,6 +6,7 @@
*/
import type { PublicMethodsOf } from '@kbn/utility-types';
import { UsageCounter } from 'src/plugins/usage_collection/server';
import type {
Logger,
KibanaRequest,
@ -46,6 +47,7 @@ export interface TaskRunnerContext {
supportsEphemeralTasks: boolean;
maxEphemeralActionsPerRule: number;
cancelAlertsOnRuleTimeout: boolean;
usageCounter?: UsageCounter;
}
export class TaskRunnerFactory {

View file

@ -5,22 +5,25 @@
* 2.0.
*/
/* eslint-disable @typescript-eslint/naming-convention */
// eslint-disable-next-line @kbn/eslint/no-restricted-paths
import { elasticsearchClientMock } from '../../../../../src/core/server/elasticsearch/client/mocks';
import {
getTotalCountAggregations,
getTotalCountInUse,
getExecutionsPerDayCount,
} from './alerts_telemetry';
getExecutionTimeoutsPerDayCount,
} from './alerting_telemetry';
describe('alerts telemetry', () => {
test('getTotalCountInUse should replace first "." symbol to "__" in alert types names', async () => {
describe('alerting telemetry', () => {
test('getTotalCountInUse should replace "." symbols with "__" in rule types names', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockReturnValue(
// @ts-expect-error @elastic/elasticsearch Aggregate only allows unknown values
elasticsearchClientMock.createSuccessTransportRequestPromise({
aggregations: {
byAlertTypeId: {
byRuleTypeId: {
value: {
ruleTypes: {
'.index-threshold': 2,
@ -47,8 +50,8 @@ describe('alerts telemetry', () => {
Object {
"countByType": Object {
"__index-threshold": 2,
"document.test__": 1,
"logs.alert.document.count": 1,
"document__test__": 1,
"logs__alert__document__count": 1,
},
"countNamespaces": 1,
"countTotal": 4,
@ -62,7 +65,7 @@ Object {
// @ts-expect-error @elastic/elasticsearch Aggregate only allows unknown values
elasticsearchClientMock.createSuccessTransportRequestPromise({
aggregations: {
byAlertTypeId: {
byRuleTypeId: {
value: {
ruleTypes: {
'.index-threshold': 2,
@ -100,8 +103,8 @@ Object {
},
"count_by_type": Object {
"__index-threshold": 2,
"document.test__": 1,
"logs.alert.document.count": 1,
"document__test__": 1,
"logs__alert__document__count": 1,
},
"count_rules_namespaces": 0,
"count_total": 4,
@ -129,7 +132,7 @@ Object {
`);
});
test('getTotalExecutionsCount should return execution aggregations for total count, count by rule type and number of failed executions', async () => {
test('getExecutionsPerDayCount should return execution aggregations for total count, count by rule type and number of failed executions', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockReturnValue(
// @ts-expect-error @elastic/elasticsearch Aggregate only allows unknown values
@ -176,26 +179,62 @@ Object {
avgExecutionTime: 0,
avgExecutionTimeByType: {
'__index-threshold': 1043934,
'document.test__': 17687687,
'logs.alert.document.count': 1675765,
document__test__: 17687687,
logs__alert__document__count: 1675765,
},
countByType: {
'__index-threshold': 2,
'document.test__': 1,
'logs.alert.document.count': 1,
document__test__: 1,
logs__alert__document__count: 1,
},
countFailuresByReason: {
unknown: 4,
},
countFailuresByReasonByType: {
unknown: {
'.index-threshold': 2,
'document.test.': 1,
'logs.alert.document.count': 1,
'__index-threshold': 2,
document__test__: 1,
logs__alert__document__count: 1,
},
},
countTotal: 4,
countTotalFailures: 4,
});
});
test('getExecutionTimeoutsPerDayCount should return execution aggregations for total timeout count and count by rule type', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockReturnValue(
// @ts-expect-error @elastic/elasticsearch Aggregate only allows unknown values
elasticsearchClientMock.createSuccessTransportRequestPromise({
aggregations: {
byRuleTypeId: {
value: {
ruleTypes: {
'.index-threshold': 2,
'logs.alert.document.count': 1,
'document.test.': 1,
},
},
},
},
hits: {
hits: [],
},
})
);
const telemetry = await getExecutionTimeoutsPerDayCount(mockEsClient, 'test');
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
expect(telemetry).toStrictEqual({
countTotal: 4,
countByType: {
'__index-threshold': 2,
document__test__: 1,
logs__alert__document__count: 1,
},
});
});
});

View file

@ -6,15 +6,15 @@
*/
import { ElasticsearchClient } from 'kibana/server';
import { AlertsUsage } from './types';
import { AlertingUsage } from './types';
const alertTypeMetric = {
const ruleTypeMetric = {
scripted_metric: {
init_script: 'state.ruleTypes = [:]; state.namespaces = [:]',
map_script: `
String alertType = doc['alert.alertTypeId'].value;
String ruleType = doc['alert.alertTypeId'].value;
String namespace = doc['namespaces'] !== null && doc['namespaces'].size() > 0 ? doc['namespaces'].value : 'default';
state.ruleTypes.put(alertType, state.ruleTypes.containsKey(alertType) ? state.ruleTypes.get(alertType) + 1 : 1);
state.ruleTypes.put(ruleType, state.ruleTypes.containsKey(ruleType) ? state.ruleTypes.get(ruleType) + 1 : 1);
if (state.namespaces.containsKey(namespace) === false) {
state.namespaces.put(namespace, 1);
}
@ -38,7 +38,7 @@ const alertTypeMetric = {
},
};
const ruleTypeExecutionsMetric = {
const ruleTypeExecutionsWithDurationMetric = {
scripted_metric: {
init_script: 'state.ruleTypes = [:]; state.ruleTypesDuration = [:];',
map_script: `
@ -66,6 +66,32 @@ const ruleTypeExecutionsMetric = {
},
};
const ruleTypeExecutionsMetric = {
scripted_metric: {
init_script: 'state.ruleTypes = [:]',
map_script: `
String ruleType = doc['rule.category'].value;
state.ruleTypes.put(ruleType, state.ruleTypes.containsKey(ruleType) ? state.ruleTypes.get(ruleType) + 1 : 1);
`,
// Combine script is executed per cluster, but we already have a key-value pair per cluster.
// Despite docs that say this is optional, this script can't be blank.
combine_script: 'return state',
// Reduce script is executed across all clusters, so we need to add up all the total from each cluster
// This also needs to account for having no data
reduce_script: `
Map result = [:];
for (Map m : states.toArray()) {
if (m !== null) {
for (String k : m.keySet()) {
result.put(k, result.containsKey(k) ? result.get(k) + m.get(k) : m.get(k));
}
}
}
return result;
`,
},
};
const ruleTypeFailureExecutionsMetric = {
scripted_metric: {
init_script: 'state.reasons = [:]',
@ -99,10 +125,10 @@ const ruleTypeFailureExecutionsMetric = {
export async function getTotalCountAggregations(
esClient: ElasticsearchClient,
kibanaInex: string
kibanaIndex: string
): Promise<
Pick<
AlertsUsage,
AlertingUsage,
| 'count_total'
| 'count_by_type'
| 'throttle_time'
@ -114,7 +140,7 @@ export async function getTotalCountAggregations(
>
> {
const { body: results } = await esClient.search({
index: kibanaInex,
index: kibanaIndex,
body: {
size: 0,
query: {
@ -210,7 +236,7 @@ export async function getTotalCountAggregations(
},
},
aggs: {
byAlertTypeId: alertTypeMetric,
byRuleTypeId: ruleTypeMetric,
max_throttle_time: { max: { field: 'alert_throttle' } },
min_throttle_time: { min: { field: 'alert_throttle' } },
avg_throttle_time: { avg: { field: 'alert_throttle' } },
@ -225,7 +251,7 @@ export async function getTotalCountAggregations(
});
const aggregations = results.aggregations as {
byAlertTypeId: { value: { ruleTypes: Record<string, string> } };
byRuleTypeId: { value: { ruleTypes: Record<string, string> } };
max_throttle_time: { value: number };
min_throttle_time: { value: number };
avg_throttle_time: { value: number };
@ -237,23 +263,15 @@ export async function getTotalCountAggregations(
avg_actions_count: { value: number };
};
const totalAlertsCount = Object.keys(aggregations.byAlertTypeId.value.ruleTypes).reduce(
const totalRulesCount = Object.keys(aggregations.byRuleTypeId.value.ruleTypes).reduce(
(total: number, key: string) =>
parseInt(aggregations.byAlertTypeId.value.ruleTypes[key], 10) + total,
parseInt(aggregations.byRuleTypeId.value.ruleTypes[key], 10) + total,
0
);
return {
count_total: totalAlertsCount,
count_by_type: Object.keys(aggregations.byAlertTypeId.value.ruleTypes).reduce(
// ES DSL aggregations are returned as `any` by esClient.search
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(obj: any, key: string) => ({
...obj,
[replaceFirstAndLastDotSymbols(key)]: aggregations.byAlertTypeId.value.ruleTypes[key],
}),
{}
),
count_total: totalRulesCount,
count_by_type: replaceDotSymbolsInRuleTypeIds(aggregations.byRuleTypeId.value.ruleTypes),
throttle_time: {
min: `${aggregations.min_throttle_time.value}s`,
avg: `${aggregations.avg_throttle_time.value}s`,
@ -283,9 +301,9 @@ export async function getTotalCountAggregations(
};
}
export async function getTotalCountInUse(esClient: ElasticsearchClient, kibanaInex: string) {
export async function getTotalCountInUse(esClient: ElasticsearchClient, kibanaIndex: string) {
const { body: searchResult } = await esClient.search({
index: kibanaInex,
index: kibanaIndex,
size: 0,
body: {
query: {
@ -294,43 +312,28 @@ export async function getTotalCountInUse(esClient: ElasticsearchClient, kibanaIn
},
},
aggs: {
byAlertTypeId: alertTypeMetric,
byRuleTypeId: ruleTypeMetric,
},
},
});
const aggregations = searchResult.aggregations as {
byAlertTypeId: {
byRuleTypeId: {
value: { ruleTypes: Record<string, string>; namespaces: Record<string, string> };
};
};
return {
countTotal: Object.keys(aggregations.byAlertTypeId.value.ruleTypes).reduce(
countTotal: Object.keys(aggregations.byRuleTypeId.value.ruleTypes).reduce(
(total: number, key: string) =>
parseInt(aggregations.byAlertTypeId.value.ruleTypes[key], 10) + total,
parseInt(aggregations.byRuleTypeId.value.ruleTypes[key], 10) + total,
0
),
countByType: Object.keys(aggregations.byAlertTypeId.value.ruleTypes).reduce(
// ES DSL aggregations are returned as `any` by esClient.search
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(obj: any, key: string) => ({
...obj,
[replaceFirstAndLastDotSymbols(key)]: aggregations.byAlertTypeId.value.ruleTypes[key],
}),
{}
),
countNamespaces: Object.keys(aggregations.byAlertTypeId.value.namespaces).length,
countByType: replaceDotSymbolsInRuleTypeIds(aggregations.byRuleTypeId.value.ruleTypes),
countNamespaces: Object.keys(aggregations.byRuleTypeId.value.namespaces).length,
};
}
function replaceFirstAndLastDotSymbols(strToReplace: string) {
const hasFirstSymbolDot = strToReplace.startsWith('.');
const appliedString = hasFirstSymbolDot ? strToReplace.replace('.', '__') : strToReplace;
const hasLastSymbolDot = strToReplace.endsWith('.');
return hasLastSymbolDot ? `${appliedString.slice(0, -1)}__` : appliedString;
}
export async function getExecutionsPerDayCount(
esClient: ElasticsearchClient,
eventLogIndex: string
@ -363,7 +366,7 @@ export async function getExecutionsPerDayCount(
},
},
aggs: {
byRuleTypeId: ruleTypeExecutionsMetric,
byRuleTypeId: ruleTypeExecutionsWithDurationMetric,
failuresByReason: ruleTypeFailureExecutionsMetric,
avgDuration: { avg: { field: 'event.duration' } },
},
@ -392,15 +395,8 @@ export async function getExecutionsPerDayCount(
parseInt(executionsAggregations.byRuleTypeId.value.ruleTypes[key], 10) + total,
0
),
countByType: Object.keys(executionsAggregations.byRuleTypeId.value.ruleTypes).reduce(
// ES DSL aggregations are returned as `any` by esClient.search
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(obj: any, key: string) => ({
...obj,
[replaceFirstAndLastDotSymbols(key)]:
executionsAggregations.byRuleTypeId.value.ruleTypes[key],
}),
{}
countByType: replaceDotSymbolsInRuleTypeIds(
executionsAggregations.byRuleTypeId.value.ruleTypes
),
countTotalFailures: Object.keys(
executionFailuresAggregations.failuresByReason.value.reasons
@ -426,7 +422,7 @@ export async function getExecutionsPerDayCount(
);
return {
...obj,
[replaceFirstAndLastDotSymbols(reason)]: countByRuleTypes,
[replaceDotSymbols(reason)]: countByRuleTypes,
};
},
{}
@ -438,8 +434,9 @@ export async function getExecutionsPerDayCount(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(obj: any, key: string) => ({
...obj,
[replaceFirstAndLastDotSymbols(key)]:
executionFailuresAggregations.failuresByReason.value.reasons[key],
[key]: replaceDotSymbolsInRuleTypeIds(
executionFailuresAggregations.failuresByReason.value.reasons[key]
),
}),
{}
),
@ -449,7 +446,7 @@ export async function getExecutionsPerDayCount(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(obj: any, key: string) => ({
...obj,
[replaceFirstAndLastDotSymbols(key)]: Math.round(
[replaceDotSymbols(key)]: Math.round(
executionsAggregations.byRuleTypeId.value.ruleTypesDuration[key] /
parseInt(executionsAggregations.byRuleTypeId.value.ruleTypes[key], 10)
),
@ -458,3 +455,69 @@ export async function getExecutionsPerDayCount(
),
};
}
export async function getExecutionTimeoutsPerDayCount(
esClient: ElasticsearchClient,
eventLogIndex: string
) {
const { body: searchResult } = await esClient.search({
index: eventLogIndex,
size: 0,
body: {
query: {
bool: {
filter: {
bool: {
must: [
{
term: { 'event.action': 'execute-timeout' },
},
{
term: { 'event.provider': 'alerting' },
},
{
range: {
'@timestamp': {
gte: 'now-1d',
},
},
},
],
},
},
},
},
aggs: {
byRuleTypeId: ruleTypeExecutionsMetric,
},
},
});
const executionsAggregations = searchResult.aggregations as {
byRuleTypeId: {
value: { ruleTypes: Record<string, string>; ruleTypesDuration: Record<string, number> };
};
};
return {
countTotal: Object.keys(executionsAggregations.byRuleTypeId.value.ruleTypes).reduce(
(total: number, key: string) =>
parseInt(executionsAggregations.byRuleTypeId.value.ruleTypes[key], 10) + total,
0
),
countByType: replaceDotSymbolsInRuleTypeIds(
executionsAggregations.byRuleTypeId.value.ruleTypes
),
};
}
function replaceDotSymbols(strToReplace: string) {
return strToReplace.replaceAll('.', '__');
}
function replaceDotSymbolsInRuleTypeIds(ruleTypeIdObj: Record<string, string>) {
return Object.keys(ruleTypeIdObj).reduce(
(obj, key) => ({ ...obj, [replaceDotSymbols(key)]: ruleTypeIdObj[key] }),
{}
);
}

View file

@ -6,13 +6,13 @@
*/
import { UsageCollectionSetup } from 'src/plugins/usage_collection/server';
import { registerAlertsUsageCollector } from './alerts_usage_collector';
import { registerAlertingUsageCollector } from './alerting_usage_collector';
import { taskManagerMock } from '../../../task_manager/server/mocks';
const taskManagerStart = taskManagerMock.createStart();
beforeEach(() => jest.resetAllMocks());
describe('registerAlertsUsageCollector', () => {
describe('registerAlertingUsageCollector', () => {
let usageCollectionMock: jest.Mocked<UsageCollectionSetup>;
beforeEach(() => {
@ -23,7 +23,7 @@ describe('registerAlertsUsageCollector', () => {
});
it('should call registerCollector', () => {
registerAlertsUsageCollector(
registerAlertingUsageCollector(
usageCollectionMock as UsageCollectionSetup,
new Promise(() => taskManagerStart)
);
@ -31,7 +31,7 @@ describe('registerAlertsUsageCollector', () => {
});
it('should call makeUsageCollector with type = alerts', () => {
registerAlertsUsageCollector(
registerAlertingUsageCollector(
usageCollectionMock as UsageCollectionSetup,
new Promise(() => taskManagerStart)
);

View file

@ -8,12 +8,12 @@
import { MakeSchemaFrom, UsageCollectionSetup } from 'src/plugins/usage_collection/server';
import { get } from 'lodash';
import { TaskManagerStartContract } from '../../../task_manager/server';
import { AlertsUsage } from './types';
import { AlertingUsage } from './types';
const byTypeSchema: MakeSchemaFrom<AlertsUsage>['count_by_type'] = {
const byTypeSchema: MakeSchemaFrom<AlertingUsage>['count_by_type'] = {
// TODO: Find out an automated way to populate the keys or reformat these into an array (and change the Remote Telemetry indexer accordingly)
DYNAMIC_KEY: { type: 'long' },
// Known alerts (searching the use of the alerts API `registerType`:
// Known rule types (searching the use of the rules API `registerType`:
// Built-in
'__index-threshold': { type: 'long' },
'__es-query': { type: 'long' },
@ -39,6 +39,12 @@ const byTypeSchema: MakeSchemaFrom<AlertsUsage>['count_by_type'] = {
// Security Solution
siem__signals: { type: 'long' }, // eslint-disable-line @typescript-eslint/naming-convention
siem__notifications: { type: 'long' }, // eslint-disable-line @typescript-eslint/naming-convention
siem__eqlRule: { type: 'long' }, // eslint-disable-line @typescript-eslint/naming-convention
siem__indicatorRule: { type: 'long' }, // eslint-disable-line @typescript-eslint/naming-convention
siem__mlRule: { type: 'long' }, // eslint-disable-line @typescript-eslint/naming-convention
siem__queryRule: { type: 'long' }, // eslint-disable-line @typescript-eslint/naming-convention
siem__savedQueryRule: { type: 'long' }, // eslint-disable-line @typescript-eslint/naming-convention
siem__thresholdRule: { type: 'long' }, // eslint-disable-line @typescript-eslint/naming-convention
// Uptime
xpack__uptime__alerts__monitorStatus: { type: 'long' }, // eslint-disable-line @typescript-eslint/naming-convention
xpack__uptime__alerts__tls: { type: 'long' }, // eslint-disable-line @typescript-eslint/naming-convention
@ -50,7 +56,7 @@ const byTypeSchema: MakeSchemaFrom<AlertsUsage>['count_by_type'] = {
xpack__ml__anomaly_detection_jobs_health: { type: 'long' }, // eslint-disable-line @typescript-eslint/naming-convention
};
const byReasonSchema: MakeSchemaFrom<AlertsUsage>['count_rules_executions_failured_by_reason_per_day'] =
const byReasonSchema: MakeSchemaFrom<AlertingUsage>['count_rules_executions_failured_by_reason_per_day'] =
{
// TODO: Find out an automated way to populate the keys or reformat these into an array (and change the Remote Telemetry indexer accordingly)
DYNAMIC_KEY: { type: 'long' },
@ -60,7 +66,7 @@ const byReasonSchema: MakeSchemaFrom<AlertsUsage>['count_rules_executions_failur
unknown: { type: 'long' },
};
const byReasonSchemaByType: MakeSchemaFrom<AlertsUsage>['count_rules_executions_failured_by_reason_by_type_per_day'] =
const byReasonSchemaByType: MakeSchemaFrom<AlertingUsage>['count_rules_executions_failured_by_reason_by_type_per_day'] =
{
// TODO: Find out an automated way to populate the keys or reformat these into an array (and change the Remote Telemetry indexer accordingly)
DYNAMIC_KEY: byTypeSchema,
@ -70,11 +76,11 @@ const byReasonSchemaByType: MakeSchemaFrom<AlertsUsage>['count_rules_executions_
unknown: byTypeSchema,
};
export function createAlertsUsageCollector(
export function createAlertingUsageCollector(
usageCollection: UsageCollectionSetup,
taskManager: Promise<TaskManagerStartContract>
) {
return usageCollection.makeUsageCollector<AlertsUsage>({
return usageCollection.makeUsageCollector<AlertingUsage>({
type: 'alerts',
isReady: async () => {
await taskManager;
@ -84,7 +90,7 @@ export function createAlertsUsageCollector(
try {
const doc = await getLatestTaskState(await taskManager);
// get the accumulated state from the recurring task
const { runs, ...state } = get(doc, 'state') as AlertsUsage & { runs: number };
const { runs, ...state } = get(doc, 'state') as AlertingUsage & { runs: number };
return {
...state,
@ -127,6 +133,8 @@ export function createAlertsUsageCollector(
count_rules_executions_failured_per_day: 0,
count_rules_executions_failured_by_reason_per_day: {},
count_rules_executions_failured_by_reason_by_type_per_day: {},
count_rules_executions_timeouts_per_day: 0,
count_rules_executions_timeouts_by_type_per_day: {},
avg_execution_time_per_day: 0,
avg_execution_time_by_type_per_day: {},
};
@ -169,6 +177,8 @@ export function createAlertsUsageCollector(
count_rules_executions_failured_per_day: { type: 'long' },
count_rules_executions_failured_by_reason_per_day: byReasonSchema,
count_rules_executions_failured_by_reason_by_type_per_day: byReasonSchemaByType,
count_rules_executions_timeouts_per_day: { type: 'long' },
count_rules_executions_timeouts_by_type_per_day: byTypeSchema,
avg_execution_time_per_day: { type: 'long' },
avg_execution_time_by_type_per_day: byTypeSchema,
},
@ -194,10 +204,10 @@ async function getLatestTaskState(taskManager: TaskManagerStartContract) {
return null;
}
export function registerAlertsUsageCollector(
export function registerAlertingUsageCollector(
usageCollection: UsageCollectionSetup,
taskManager: Promise<TaskManagerStartContract>
) {
const collector = createAlertsUsageCollector(usageCollection, taskManager);
const collector = createAlertingUsageCollector(usageCollection, taskManager);
usageCollection.registerCollector(collector);
}

View file

@ -5,4 +5,4 @@
* 2.0.
*/
export { registerAlertsUsageCollector } from './alerts_usage_collector';
export { registerAlertingUsageCollector } from './alerting_usage_collector';

View file

@ -17,7 +17,8 @@ import {
getTotalCountAggregations,
getTotalCountInUse,
getExecutionsPerDayCount,
} from './alerts_telemetry';
getExecutionTimeoutsPerDayCount,
} from './alerting_telemetry';
export const TELEMETRY_TASK_TYPE = 'alerting_telemetry';
@ -92,29 +93,40 @@ export function telemetryTaskRunner(
getTotalCountAggregations(esClient, kibanaIndex),
getTotalCountInUse(esClient, kibanaIndex),
getExecutionsPerDayCount(esClient, eventLogIndex),
getExecutionTimeoutsPerDayCount(esClient, eventLogIndex),
])
.then(([totalCountAggregations, totalInUse, totalExecutions]) => {
return {
state: {
runs: (state.runs || 0) + 1,
...totalCountAggregations,
count_active_by_type: totalInUse.countByType,
count_active_total: totalInUse.countTotal,
count_disabled_total: totalCountAggregations.count_total - totalInUse.countTotal,
count_rules_namespaces: totalInUse.countNamespaces,
count_rules_executions_per_day: totalExecutions.countTotal,
count_rules_executions_by_type_per_day: totalExecutions.countByType,
count_rules_executions_failured_per_day: totalExecutions.countTotalFailures,
count_rules_executions_failured_by_reason_per_day:
totalExecutions.countFailuresByReason,
count_rules_executions_failured_by_reason_by_type_per_day:
totalExecutions.countFailuresByReasonByType,
avg_execution_time_per_day: totalExecutions.avgExecutionTime,
avg_execution_time_by_type_per_day: totalExecutions.avgExecutionTimeByType,
},
runAt: getNextMidnight(),
};
})
.then(
([
totalCountAggregations,
totalInUse,
dailyExecutionCounts,
dailyExecutionTimeoutCounts,
]) => {
return {
state: {
runs: (state.runs || 0) + 1,
...totalCountAggregations,
count_active_by_type: totalInUse.countByType,
count_active_total: totalInUse.countTotal,
count_disabled_total: totalCountAggregations.count_total - totalInUse.countTotal,
count_rules_namespaces: totalInUse.countNamespaces,
count_rules_executions_per_day: dailyExecutionCounts.countTotal,
count_rules_executions_by_type_per_day: dailyExecutionCounts.countByType,
count_rules_executions_failured_per_day: dailyExecutionCounts.countTotalFailures,
count_rules_executions_failured_by_reason_per_day:
dailyExecutionCounts.countFailuresByReason,
count_rules_executions_failured_by_reason_by_type_per_day:
dailyExecutionCounts.countFailuresByReasonByType,
count_rules_executions_timeouts_per_day: dailyExecutionTimeoutCounts.countTotal,
count_rules_executions_timeouts_by_type_per_day:
dailyExecutionTimeoutCounts.countByType,
avg_execution_time_per_day: dailyExecutionCounts.avgExecutionTime,
avg_execution_time_by_type_per_day: dailyExecutionCounts.avgExecutionTimeByType,
},
runAt: getNextMidnight(),
};
}
)
.catch((errMsg) => {
logger.warn(`Error executing alerting telemetry task: ${errMsg}`);
return {

View file

@ -5,7 +5,7 @@
* 2.0.
*/
export interface AlertsUsage {
export interface AlertingUsage {
count_total: number;
count_active_total: number;
count_disabled_total: number;
@ -17,6 +17,8 @@ export interface AlertsUsage {
count_rules_executions_failured_per_day: number;
count_rules_executions_failured_by_reason_per_day: Record<string, number>;
count_rules_executions_failured_by_reason_by_type_per_day: Record<string, Record<string, number>>;
count_rules_executions_timeouts_per_day: number;
count_rules_executions_timeouts_by_type_per_day: Record<string, number>;
avg_execution_time_per_day: number;
avg_execution_time_by_type_per_day: Record<string, number>;
throttle_time: {

View file

@ -400,6 +400,24 @@
"siem__notifications": {
"type": "long"
},
"siem__eqlRule": {
"type": "long"
},
"siem__indicatorRule": {
"type": "long"
},
"siem__mlRule": {
"type": "long"
},
"siem__queryRule": {
"type": "long"
},
"siem__savedQueryRule": {
"type": "long"
},
"siem__thresholdRule": {
"type": "long"
},
"xpack__uptime__alerts__monitorStatus": {
"type": "long"
},
@ -485,6 +503,24 @@
"siem__notifications": {
"type": "long"
},
"siem__eqlRule": {
"type": "long"
},
"siem__indicatorRule": {
"type": "long"
},
"siem__mlRule": {
"type": "long"
},
"siem__queryRule": {
"type": "long"
},
"siem__savedQueryRule": {
"type": "long"
},
"siem__thresholdRule": {
"type": "long"
},
"xpack__uptime__alerts__monitorStatus": {
"type": "long"
},
@ -576,6 +612,24 @@
"siem__notifications": {
"type": "long"
},
"siem__eqlRule": {
"type": "long"
},
"siem__indicatorRule": {
"type": "long"
},
"siem__mlRule": {
"type": "long"
},
"siem__queryRule": {
"type": "long"
},
"siem__savedQueryRule": {
"type": "long"
},
"siem__thresholdRule": {
"type": "long"
},
"xpack__uptime__alerts__monitorStatus": {
"type": "long"
},
@ -685,6 +739,24 @@
"siem__notifications": {
"type": "long"
},
"siem__eqlRule": {
"type": "long"
},
"siem__indicatorRule": {
"type": "long"
},
"siem__mlRule": {
"type": "long"
},
"siem__queryRule": {
"type": "long"
},
"siem__savedQueryRule": {
"type": "long"
},
"siem__thresholdRule": {
"type": "long"
},
"xpack__uptime__alerts__monitorStatus": {
"type": "long"
},
@ -770,6 +842,24 @@
"siem__notifications": {
"type": "long"
},
"siem__eqlRule": {
"type": "long"
},
"siem__indicatorRule": {
"type": "long"
},
"siem__mlRule": {
"type": "long"
},
"siem__queryRule": {
"type": "long"
},
"siem__savedQueryRule": {
"type": "long"
},
"siem__thresholdRule": {
"type": "long"
},
"xpack__uptime__alerts__monitorStatus": {
"type": "long"
},
@ -855,6 +945,24 @@
"siem__notifications": {
"type": "long"
},
"siem__eqlRule": {
"type": "long"
},
"siem__indicatorRule": {
"type": "long"
},
"siem__mlRule": {
"type": "long"
},
"siem__queryRule": {
"type": "long"
},
"siem__savedQueryRule": {
"type": "long"
},
"siem__thresholdRule": {
"type": "long"
},
"xpack__uptime__alerts__monitorStatus": {
"type": "long"
},
@ -940,6 +1048,24 @@
"siem__notifications": {
"type": "long"
},
"siem__eqlRule": {
"type": "long"
},
"siem__indicatorRule": {
"type": "long"
},
"siem__mlRule": {
"type": "long"
},
"siem__queryRule": {
"type": "long"
},
"siem__savedQueryRule": {
"type": "long"
},
"siem__thresholdRule": {
"type": "long"
},
"xpack__uptime__alerts__monitorStatus": {
"type": "long"
},
@ -1025,6 +1151,24 @@
"siem__notifications": {
"type": "long"
},
"siem__eqlRule": {
"type": "long"
},
"siem__indicatorRule": {
"type": "long"
},
"siem__mlRule": {
"type": "long"
},
"siem__queryRule": {
"type": "long"
},
"siem__savedQueryRule": {
"type": "long"
},
"siem__thresholdRule": {
"type": "long"
},
"xpack__uptime__alerts__monitorStatus": {
"type": "long"
},
@ -1047,6 +1191,112 @@
}
}
},
"count_rules_executions_timeouts_per_day": {
"type": "long"
},
"count_rules_executions_timeouts_by_type_per_day": {
"properties": {
"DYNAMIC_KEY": {
"type": "long"
},
"__index-threshold": {
"type": "long"
},
"__es-query": {
"type": "long"
},
"transform_health": {
"type": "long"
},
"apm__error_rate": {
"type": "long"
},
"apm__transaction_error_rate": {
"type": "long"
},
"apm__transaction_duration": {
"type": "long"
},
"apm__transaction_duration_anomaly": {
"type": "long"
},
"metrics__alert__threshold": {
"type": "long"
},
"metrics__alert__inventory__threshold": {
"type": "long"
},
"logs__alert__document__count": {
"type": "long"
},
"monitoring_alert_cluster_health": {
"type": "long"
},
"monitoring_alert_cpu_usage": {
"type": "long"
},
"monitoring_alert_disk_usage": {
"type": "long"
},
"monitoring_alert_elasticsearch_version_mismatch": {
"type": "long"
},
"monitoring_alert_kibana_version_mismatch": {
"type": "long"
},
"monitoring_alert_license_expiration": {
"type": "long"
},
"monitoring_alert_logstash_version_mismatch": {
"type": "long"
},
"monitoring_alert_nodes_changed": {
"type": "long"
},
"siem__signals": {
"type": "long"
},
"siem__notifications": {
"type": "long"
},
"siem__eqlRule": {
"type": "long"
},
"siem__indicatorRule": {
"type": "long"
},
"siem__mlRule": {
"type": "long"
},
"siem__queryRule": {
"type": "long"
},
"siem__savedQueryRule": {
"type": "long"
},
"siem__thresholdRule": {
"type": "long"
},
"xpack__uptime__alerts__monitorStatus": {
"type": "long"
},
"xpack__uptime__alerts__tls": {
"type": "long"
},
"xpack__uptime__alerts__durationAnomaly": {
"type": "long"
},
"__geo-containment": {
"type": "long"
},
"xpack__ml__anomaly_detection_alert": {
"type": "long"
},
"xpack__ml__anomaly_detection_jobs_health": {
"type": "long"
}
}
},
"avg_execution_time_per_day": {
"type": "long"
},
@ -1115,6 +1365,24 @@
"siem__notifications": {
"type": "long"
},
"siem__eqlRule": {
"type": "long"
},
"siem__indicatorRule": {
"type": "long"
},
"siem__mlRule": {
"type": "long"
},
"siem__queryRule": {
"type": "long"
},
"siem__savedQueryRule": {
"type": "long"
},
"siem__thresholdRule": {
"type": "long"
},
"xpack__uptime__alerts__monitorStatus": {
"type": "long"
},