mirror of
https://github.com/elastic/kibana.git
synced 2025-06-27 18:51:07 -04:00
[ResponseOps][MW] Add telemetry for the maintenance window (#192483)
Resolve: https://github.com/elastic/kibana/issues/184088 In this PR add telemetry collection of these metrics: - total number of MW in deployments - number of active MW with "repeat" toggle on (time based) - number of active MW with "filter alerts" toggle on (KQL based) ## Testing Create several MW with different settings (toggles on and off) To test changes reflected in telemetry object, modify this file: `x-pack/plugins/alerting/server/usage/task.ts` With: ``` async function scheduleTasks(logger: Logger, taskManager: TaskManagerStartContract) { try { await taskManager.ensureScheduled({ id: TASK_ID, taskType: TELEMETRY_TASK_TYPE, state: emptyState, params: {}, schedule: SCHEDULE, }); } catch (e) { logger.error(`Error scheduling ${TASK_ID}, received ${e.message}`); } await taskManager.runSoon(TASK_ID); } ``` This will cause the telemetry to be sent as soon as the server is restarted. **Run Telemetry usage payload API in your browser console to verify telemetry object:** https://docs.elastic.dev/telemetry/collection/snapshot-telemetry#telemetry-usage-payload-api P.S.: Add space at the beginning of URL ### Checklist - [x] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios --------- Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
This commit is contained in:
parent
210f5527a0
commit
eabb102281
8 changed files with 364 additions and 13 deletions
|
@ -205,6 +205,9 @@ export function createAlertingUsageCollector(
|
||||||
count_rules_with_tags: 0,
|
count_rules_with_tags: 0,
|
||||||
count_rules_snoozed: 0,
|
count_rules_snoozed: 0,
|
||||||
count_rules_muted: 0,
|
count_rules_muted: 0,
|
||||||
|
count_mw_total: 0,
|
||||||
|
count_mw_with_repeat_toggle_on: 0,
|
||||||
|
count_mw_with_filter_alert_toggle_on: 0,
|
||||||
count_rules_with_muted_alerts: 0,
|
count_rules_with_muted_alerts: 0,
|
||||||
count_connector_types_by_consumers: {},
|
count_connector_types_by_consumers: {},
|
||||||
count_rules_by_execution_status_per_day: {},
|
count_rules_by_execution_status_per_day: {},
|
||||||
|
@ -289,6 +292,9 @@ export function createAlertingUsageCollector(
|
||||||
count_rules_by_notify_when: byNotifyWhenSchema,
|
count_rules_by_notify_when: byNotifyWhenSchema,
|
||||||
count_rules_snoozed: { type: 'long' },
|
count_rules_snoozed: { type: 'long' },
|
||||||
count_rules_muted: { type: 'long' },
|
count_rules_muted: { type: 'long' },
|
||||||
|
count_mw_total: { type: 'long' },
|
||||||
|
count_mw_with_repeat_toggle_on: { type: 'long' },
|
||||||
|
count_mw_with_filter_alert_toggle_on: { type: 'long' },
|
||||||
count_rules_with_muted_alerts: { type: 'long' },
|
count_rules_with_muted_alerts: { type: 'long' },
|
||||||
count_connector_types_by_consumers: { DYNAMIC_KEY: { DYNAMIC_KEY: { type: 'long' } } },
|
count_connector_types_by_consumers: { DYNAMIC_KEY: { DYNAMIC_KEY: { type: 'long' } } },
|
||||||
count_rules_by_execution_status_per_day: byStatusPerDaySchema,
|
count_rules_by_execution_status_per_day: byStatusPerDaySchema,
|
||||||
|
|
|
@ -6,11 +6,97 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { elasticsearchServiceMock, loggingSystemMock } from '@kbn/core/server/mocks';
|
import { elasticsearchServiceMock, loggingSystemMock } from '@kbn/core/server/mocks';
|
||||||
import { getTotalCountAggregations, getTotalCountInUse } from './get_telemetry_from_kibana';
|
import {
|
||||||
|
getTotalCountAggregations,
|
||||||
|
getTotalCountInUse,
|
||||||
|
getMWTelemetry,
|
||||||
|
} from './get_telemetry_from_kibana';
|
||||||
|
import { savedObjectsClientMock } from '@kbn/core/server/mocks';
|
||||||
|
import { MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE } from '../../../common';
|
||||||
|
import { ISavedObjectsRepository } from '@kbn/core/server';
|
||||||
|
|
||||||
const elasticsearch = elasticsearchServiceMock.createStart();
|
const elasticsearch = elasticsearchServiceMock.createStart();
|
||||||
const esClient = elasticsearch.client.asInternalUser;
|
const esClient = elasticsearch.client.asInternalUser;
|
||||||
const logger: ReturnType<typeof loggingSystemMock.createLogger> = loggingSystemMock.createLogger();
|
const logger: ReturnType<typeof loggingSystemMock.createLogger> = loggingSystemMock.createLogger();
|
||||||
|
const savedObjectsClient = savedObjectsClientMock.create() as unknown as ISavedObjectsRepository;
|
||||||
|
const thrownError = new Error('Fail');
|
||||||
|
|
||||||
|
const mockedResponse = {
|
||||||
|
saved_objects: [
|
||||||
|
{
|
||||||
|
id: '1',
|
||||||
|
type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE,
|
||||||
|
attributes: {
|
||||||
|
title: 'test_rule_1',
|
||||||
|
enabled: true,
|
||||||
|
duration: 1800000,
|
||||||
|
expirationDate: '2025-09-09T13:13:07.824Z',
|
||||||
|
events: [],
|
||||||
|
rRule: {
|
||||||
|
dtstart: '2024-09-09T13:13:02.054Z',
|
||||||
|
tzid: 'Europe/Stockholm',
|
||||||
|
freq: 0,
|
||||||
|
count: 1,
|
||||||
|
},
|
||||||
|
createdBy: null,
|
||||||
|
updatedBy: null,
|
||||||
|
createdAt: '2024-09-09T13:13:07.825Z',
|
||||||
|
updatedAt: '2024-09-09T13:13:07.825Z',
|
||||||
|
scopedQuery: null,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: '2',
|
||||||
|
type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE,
|
||||||
|
attributes: {
|
||||||
|
title: 'test_rule_2',
|
||||||
|
enabled: true,
|
||||||
|
duration: 1800000,
|
||||||
|
expirationDate: '2025-09-09T13:13:07.824Z',
|
||||||
|
events: [],
|
||||||
|
rRule: {
|
||||||
|
dtstart: '2024-09-09T13:13:02.054Z',
|
||||||
|
tzid: 'Europe/Stockholm',
|
||||||
|
freq: 3,
|
||||||
|
interval: 1,
|
||||||
|
byweekday: ['SU'],
|
||||||
|
},
|
||||||
|
createdBy: null,
|
||||||
|
updatedBy: null,
|
||||||
|
createdAt: '2024-09-09T13:13:07.825Z',
|
||||||
|
updatedAt: '2024-09-09T13:13:07.825Z',
|
||||||
|
scopedQuery: {
|
||||||
|
filters: [],
|
||||||
|
kql: 'kibana.alert.job_errors_results.job_id : * ',
|
||||||
|
dsl: '{"bool":{"must":[],"filter":[{"bool":{"should":[{"exists":{"field":"kibana.alert.job_errors_results.job_id"}}],"minimum_should_match":1}}],"should":[],"must_not":[]}}',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: '3',
|
||||||
|
type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE,
|
||||||
|
attributes: {
|
||||||
|
title: 'test_rule_3',
|
||||||
|
enabled: true,
|
||||||
|
duration: 1800000,
|
||||||
|
expirationDate: '2025-09-09T13:13:07.824Z',
|
||||||
|
events: [],
|
||||||
|
rRule: {
|
||||||
|
dtstart: '2024-09-09T13:13:02.054Z',
|
||||||
|
tzid: 'Europe/Stockholm',
|
||||||
|
freq: 3,
|
||||||
|
interval: 1,
|
||||||
|
byweekday: ['TU'],
|
||||||
|
},
|
||||||
|
createdBy: null,
|
||||||
|
updatedBy: null,
|
||||||
|
createdAt: '2024-09-09T13:13:07.825Z',
|
||||||
|
updatedAt: '2024-09-09T13:13:07.825Z',
|
||||||
|
scopedQuery: null,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
describe('kibana index telemetry', () => {
|
describe('kibana index telemetry', () => {
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
|
@ -420,4 +506,94 @@ describe('kibana index telemetry', () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('getMWTelemetry', () => {
|
||||||
|
test('should return MW telemetry', async () => {
|
||||||
|
savedObjectsClient.createPointInTimeFinder = jest.fn().mockReturnValue({
|
||||||
|
close: jest.fn(),
|
||||||
|
find: jest.fn().mockImplementation(async function* () {
|
||||||
|
yield mockedResponse;
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
const telemetry = await getMWTelemetry({
|
||||||
|
savedObjectsClient,
|
||||||
|
logger,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(savedObjectsClient.createPointInTimeFinder).toHaveBeenCalledWith({
|
||||||
|
type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE,
|
||||||
|
namespaces: ['*'],
|
||||||
|
perPage: 100,
|
||||||
|
fields: ['rRule', 'scopedQuery'],
|
||||||
|
});
|
||||||
|
expect(telemetry).toStrictEqual({
|
||||||
|
count_mw_total: 3,
|
||||||
|
count_mw_with_repeat_toggle_on: 2,
|
||||||
|
count_mw_with_filter_alert_toggle_on: 1,
|
||||||
|
hasErrors: false,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test('should throw the error', async () => {
|
||||||
|
savedObjectsClient.createPointInTimeFinder = jest.fn().mockReturnValue({
|
||||||
|
close: jest.fn(),
|
||||||
|
find: jest.fn().mockImplementation(async function* () {
|
||||||
|
throw thrownError;
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
const telemetry = await getMWTelemetry({
|
||||||
|
savedObjectsClient,
|
||||||
|
logger,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(savedObjectsClient.createPointInTimeFinder).toHaveBeenCalledWith({
|
||||||
|
type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE,
|
||||||
|
namespaces: ['*'],
|
||||||
|
perPage: 100,
|
||||||
|
fields: ['rRule', 'scopedQuery'],
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(telemetry).toStrictEqual({
|
||||||
|
count_mw_total: 0,
|
||||||
|
count_mw_with_repeat_toggle_on: 0,
|
||||||
|
count_mw_with_filter_alert_toggle_on: 0,
|
||||||
|
hasErrors: true,
|
||||||
|
errorMessage: 'Fail',
|
||||||
|
});
|
||||||
|
expect(logger.warn).toHaveBeenCalled();
|
||||||
|
const loggerCall = logger.warn.mock.calls[0][0];
|
||||||
|
const loggerMeta = logger.warn.mock.calls[0][1];
|
||||||
|
expect(loggerCall).toBe('Error executing alerting telemetry task: getTotalMWCount - {}');
|
||||||
|
expect(loggerMeta?.tags).toEqual(['alerting', 'telemetry-failed']);
|
||||||
|
expect(loggerMeta?.error?.stack_trace).toBeDefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
test('should stop on MW max limit count', async () => {
|
||||||
|
savedObjectsClient.createPointInTimeFinder = jest.fn().mockReturnValue({
|
||||||
|
close: jest.fn(),
|
||||||
|
find: jest.fn().mockImplementation(async function* () {
|
||||||
|
yield mockedResponse;
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
const telemetry = await getMWTelemetry({
|
||||||
|
savedObjectsClient,
|
||||||
|
logger,
|
||||||
|
maxDocuments: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(savedObjectsClient.createPointInTimeFinder).toHaveBeenCalledWith({
|
||||||
|
type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE,
|
||||||
|
namespaces: ['*'],
|
||||||
|
perPage: 100,
|
||||||
|
fields: ['rRule', 'scopedQuery'],
|
||||||
|
});
|
||||||
|
expect(telemetry).toStrictEqual({
|
||||||
|
count_mw_total: 2,
|
||||||
|
count_mw_with_repeat_toggle_on: 1,
|
||||||
|
count_mw_with_filter_alert_toggle_on: 1,
|
||||||
|
hasErrors: false,
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
|
@ -11,7 +11,7 @@ import type {
|
||||||
AggregationsTermsAggregateBase,
|
AggregationsTermsAggregateBase,
|
||||||
AggregationsStringTermsBucketKeys,
|
AggregationsStringTermsBucketKeys,
|
||||||
} from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
|
} from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
|
||||||
import { ElasticsearchClient, Logger } from '@kbn/core/server';
|
import { ElasticsearchClient, Logger, ISavedObjectsRepository } from '@kbn/core/server';
|
||||||
|
|
||||||
import {
|
import {
|
||||||
ConnectorsByConsumersBucket,
|
ConnectorsByConsumersBucket,
|
||||||
|
@ -23,6 +23,8 @@ import { AlertingUsage } from '../types';
|
||||||
import { NUM_ALERTING_RULE_TYPES } from '../alerting_usage_collector';
|
import { NUM_ALERTING_RULE_TYPES } from '../alerting_usage_collector';
|
||||||
import { parseSimpleRuleTypeBucket } from './parse_simple_rule_type_bucket';
|
import { parseSimpleRuleTypeBucket } from './parse_simple_rule_type_bucket';
|
||||||
import { groupRulesBySearchType } from './group_rules_by_search_type';
|
import { groupRulesBySearchType } from './group_rules_by_search_type';
|
||||||
|
import { MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE } from '../../../common';
|
||||||
|
import { MaintenanceWindowAttributes } from '../../data/maintenance_window/types';
|
||||||
|
|
||||||
interface Opts {
|
interface Opts {
|
||||||
esClient: ElasticsearchClient;
|
esClient: ElasticsearchClient;
|
||||||
|
@ -30,6 +32,12 @@ interface Opts {
|
||||||
logger: Logger;
|
logger: Logger;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface MWOpts {
|
||||||
|
savedObjectsClient: ISavedObjectsRepository;
|
||||||
|
logger: Logger;
|
||||||
|
maxDocuments?: number;
|
||||||
|
}
|
||||||
|
|
||||||
type GetTotalCountsResults = Pick<
|
type GetTotalCountsResults = Pick<
|
||||||
AlertingUsage,
|
AlertingUsage,
|
||||||
| 'count_total'
|
| 'count_total'
|
||||||
|
@ -48,6 +56,14 @@ type GetTotalCountsResults = Pick<
|
||||||
| 'connectors_per_alert'
|
| 'connectors_per_alert'
|
||||||
> & { errorMessage?: string; hasErrors: boolean };
|
> & { errorMessage?: string; hasErrors: boolean };
|
||||||
|
|
||||||
|
type GetMWTelemetryResults = Pick<
|
||||||
|
AlertingUsage,
|
||||||
|
'count_mw_total' | 'count_mw_with_repeat_toggle_on' | 'count_mw_with_filter_alert_toggle_on'
|
||||||
|
> & {
|
||||||
|
errorMessage?: string;
|
||||||
|
hasErrors: boolean;
|
||||||
|
};
|
||||||
|
|
||||||
interface GetTotalCountInUseResults {
|
interface GetTotalCountInUseResults {
|
||||||
countTotal: number;
|
countTotal: number;
|
||||||
countByType: Record<string, number>;
|
countByType: Record<string, number>;
|
||||||
|
@ -56,6 +72,8 @@ interface GetTotalCountInUseResults {
|
||||||
hasErrors: boolean;
|
hasErrors: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const TELEMETRY_MW_COUNT_LIMIT = 10000;
|
||||||
|
|
||||||
export async function getTotalCountAggregations({
|
export async function getTotalCountAggregations({
|
||||||
esClient,
|
esClient,
|
||||||
alertIndex,
|
alertIndex,
|
||||||
|
@ -490,3 +508,60 @@ export async function getTotalCountInUse({
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function getMWTelemetry({
|
||||||
|
savedObjectsClient,
|
||||||
|
logger,
|
||||||
|
maxDocuments = TELEMETRY_MW_COUNT_LIMIT,
|
||||||
|
}: MWOpts): Promise<GetMWTelemetryResults> {
|
||||||
|
try {
|
||||||
|
const mwFinder = savedObjectsClient.createPointInTimeFinder<MaintenanceWindowAttributes>({
|
||||||
|
type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE,
|
||||||
|
namespaces: ['*'],
|
||||||
|
perPage: 100,
|
||||||
|
fields: ['rRule', 'scopedQuery'],
|
||||||
|
});
|
||||||
|
|
||||||
|
let countMWTotal = 0;
|
||||||
|
let countMWWithRepeatToggleON = 0;
|
||||||
|
let countMWWithFilterAlertToggleON = 0;
|
||||||
|
mwLoop: for await (const response of mwFinder.find()) {
|
||||||
|
for (const mwSavedObject of response.saved_objects) {
|
||||||
|
if (countMWTotal > maxDocuments) break mwLoop;
|
||||||
|
countMWTotal = countMWTotal + 1;
|
||||||
|
// scopedQuery property will be null if "Filter alerts" toggle will be off
|
||||||
|
if (mwSavedObject.attributes.scopedQuery) {
|
||||||
|
countMWWithFilterAlertToggleON = countMWWithFilterAlertToggleON + 1;
|
||||||
|
}
|
||||||
|
// interval property will be not in place if "Repeat" toggle will be off
|
||||||
|
if (Object.hasOwn(mwSavedObject.attributes.rRule, 'interval')) {
|
||||||
|
countMWWithRepeatToggleON = countMWWithRepeatToggleON + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
await mwFinder.close();
|
||||||
|
|
||||||
|
return {
|
||||||
|
hasErrors: false,
|
||||||
|
count_mw_total: countMWTotal,
|
||||||
|
count_mw_with_repeat_toggle_on: countMWWithRepeatToggleON,
|
||||||
|
count_mw_with_filter_alert_toggle_on: countMWWithFilterAlertToggleON,
|
||||||
|
};
|
||||||
|
} catch (err) {
|
||||||
|
const errorMessage = err?.message ? err.message : err.toString();
|
||||||
|
logger.warn(
|
||||||
|
`Error executing alerting telemetry task: getTotalMWCount - ${JSON.stringify(err)}`,
|
||||||
|
{
|
||||||
|
tags: ['alerting', 'telemetry-failed'],
|
||||||
|
error: { stack_trace: err?.stack },
|
||||||
|
}
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
hasErrors: true,
|
||||||
|
errorMessage,
|
||||||
|
count_mw_total: 0,
|
||||||
|
count_mw_with_repeat_toggle_on: 0,
|
||||||
|
count_mw_with_filter_alert_toggle_on: 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -12,15 +12,19 @@ import {
|
||||||
TaskManagerStartContract,
|
TaskManagerStartContract,
|
||||||
IntervalSchedule,
|
IntervalSchedule,
|
||||||
} from '@kbn/task-manager-plugin/server';
|
} from '@kbn/task-manager-plugin/server';
|
||||||
|
|
||||||
import { getFailedAndUnrecognizedTasksPerDay } from './lib/get_telemetry_from_task_manager';
|
import { getFailedAndUnrecognizedTasksPerDay } from './lib/get_telemetry_from_task_manager';
|
||||||
import { getTotalCountAggregations, getTotalCountInUse } from './lib/get_telemetry_from_kibana';
|
import {
|
||||||
|
getTotalCountAggregations,
|
||||||
|
getTotalCountInUse,
|
||||||
|
getMWTelemetry,
|
||||||
|
} from './lib/get_telemetry_from_kibana';
|
||||||
import {
|
import {
|
||||||
getExecutionsPerDayCount,
|
getExecutionsPerDayCount,
|
||||||
getExecutionTimeoutsPerDayCount,
|
getExecutionTimeoutsPerDayCount,
|
||||||
} from './lib/get_telemetry_from_event_log';
|
} from './lib/get_telemetry_from_event_log';
|
||||||
import { stateSchemaByVersion, emptyState, type LatestTaskStateSchema } from './task_state';
|
import { stateSchemaByVersion, emptyState, type LatestTaskStateSchema } from './task_state';
|
||||||
import { RULE_SAVED_OBJECT_TYPE } from '../saved_objects';
|
import { RULE_SAVED_OBJECT_TYPE } from '../saved_objects';
|
||||||
|
import { MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE } from '../../common';
|
||||||
|
|
||||||
export const TELEMETRY_TASK_TYPE = 'alerting_telemetry';
|
export const TELEMETRY_TASK_TYPE = 'alerting_telemetry';
|
||||||
|
|
||||||
|
@ -36,12 +40,6 @@ export function initializeAlertingTelemetry(
|
||||||
registerAlertingTelemetryTask(logger, core, taskManager, eventLogIndex);
|
registerAlertingTelemetryTask(logger, core, taskManager, eventLogIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function scheduleAlertingTelemetry(logger: Logger, taskManager?: TaskManagerStartContract) {
|
|
||||||
if (taskManager) {
|
|
||||||
scheduleTasks(logger, taskManager).catch(() => {}); // it shouldn't reject, but just in case
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function registerAlertingTelemetryTask(
|
function registerAlertingTelemetryTask(
|
||||||
logger: Logger,
|
logger: Logger,
|
||||||
core: CoreSetup,
|
core: CoreSetup,
|
||||||
|
@ -58,6 +56,12 @@ function registerAlertingTelemetryTask(
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function scheduleAlertingTelemetry(logger: Logger, taskManager?: TaskManagerStartContract) {
|
||||||
|
if (taskManager) {
|
||||||
|
scheduleTasks(logger, taskManager).catch(() => {}); // it shouldn't reject, but just in case
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function scheduleTasks(logger: Logger, taskManager: TaskManagerStartContract) {
|
async function scheduleTasks(logger: Logger, taskManager: TaskManagerStartContract) {
|
||||||
try {
|
try {
|
||||||
await taskManager.ensureScheduled({
|
await taskManager.ensureScheduled({
|
||||||
|
@ -93,16 +97,26 @@ export function telemetryTaskRunner(
|
||||||
.getStartServices()
|
.getStartServices()
|
||||||
.then(([coreStart]) => coreStart.savedObjects.getIndexForType(RULE_SAVED_OBJECT_TYPE));
|
.then(([coreStart]) => coreStart.savedObjects.getIndexForType(RULE_SAVED_OBJECT_TYPE));
|
||||||
|
|
||||||
|
const getSavedObjectClient = () =>
|
||||||
|
core
|
||||||
|
.getStartServices()
|
||||||
|
.then(([coreStart]) =>
|
||||||
|
coreStart.savedObjects.createInternalRepository([MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE])
|
||||||
|
);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
async run() {
|
async run() {
|
||||||
const esClient = await getEsClient();
|
const esClient = await getEsClient();
|
||||||
const alertIndex = await getAlertIndex();
|
const alertIndex = await getAlertIndex();
|
||||||
|
const savedObjectsClient = await getSavedObjectClient();
|
||||||
|
|
||||||
return Promise.all([
|
return Promise.all([
|
||||||
getTotalCountAggregations({ esClient, alertIndex, logger }),
|
getTotalCountAggregations({ esClient, alertIndex, logger }),
|
||||||
getTotalCountInUse({ esClient, alertIndex, logger }),
|
getTotalCountInUse({ esClient, alertIndex, logger }),
|
||||||
getExecutionsPerDayCount({ esClient, eventLogIndex, logger }),
|
getExecutionsPerDayCount({ esClient, eventLogIndex, logger }),
|
||||||
getExecutionTimeoutsPerDayCount({ esClient, eventLogIndex, logger }),
|
getExecutionTimeoutsPerDayCount({ esClient, eventLogIndex, logger }),
|
||||||
getFailedAndUnrecognizedTasksPerDay({ esClient, taskManagerIndex, logger }),
|
getFailedAndUnrecognizedTasksPerDay({ esClient, taskManagerIndex, logger }),
|
||||||
|
getMWTelemetry({ logger, savedObjectsClient }),
|
||||||
])
|
])
|
||||||
.then(
|
.then(
|
||||||
([
|
([
|
||||||
|
@ -111,13 +125,15 @@ export function telemetryTaskRunner(
|
||||||
dailyExecutionCounts,
|
dailyExecutionCounts,
|
||||||
dailyExecutionTimeoutCounts,
|
dailyExecutionTimeoutCounts,
|
||||||
dailyFailedAndUnrecognizedTasks,
|
dailyFailedAndUnrecognizedTasks,
|
||||||
|
MWTelemetry,
|
||||||
]) => {
|
]) => {
|
||||||
const hasErrors =
|
const hasErrors =
|
||||||
totalCountAggregations.hasErrors ||
|
totalCountAggregations.hasErrors ||
|
||||||
totalInUse.hasErrors ||
|
totalInUse.hasErrors ||
|
||||||
dailyExecutionCounts.hasErrors ||
|
dailyExecutionCounts.hasErrors ||
|
||||||
dailyExecutionTimeoutCounts.hasErrors ||
|
dailyExecutionTimeoutCounts.hasErrors ||
|
||||||
dailyFailedAndUnrecognizedTasks.hasErrors;
|
dailyFailedAndUnrecognizedTasks.hasErrors ||
|
||||||
|
MWTelemetry.hasErrors;
|
||||||
|
|
||||||
const errorMessages = [
|
const errorMessages = [
|
||||||
totalCountAggregations.errorMessage,
|
totalCountAggregations.errorMessage,
|
||||||
|
@ -125,6 +141,7 @@ export function telemetryTaskRunner(
|
||||||
dailyExecutionCounts.errorMessage,
|
dailyExecutionCounts.errorMessage,
|
||||||
dailyExecutionTimeoutCounts.errorMessage,
|
dailyExecutionTimeoutCounts.errorMessage,
|
||||||
dailyFailedAndUnrecognizedTasks.errorMessage,
|
dailyFailedAndUnrecognizedTasks.errorMessage,
|
||||||
|
MWTelemetry.errorMessage,
|
||||||
].filter((message) => message !== undefined);
|
].filter((message) => message !== undefined);
|
||||||
|
|
||||||
const updatedState: LatestTaskStateSchema = {
|
const updatedState: LatestTaskStateSchema = {
|
||||||
|
@ -147,6 +164,10 @@ export function telemetryTaskRunner(
|
||||||
count_rules_by_notify_when: totalCountAggregations.count_rules_by_notify_when,
|
count_rules_by_notify_when: totalCountAggregations.count_rules_by_notify_when,
|
||||||
count_rules_snoozed: totalCountAggregations.count_rules_snoozed,
|
count_rules_snoozed: totalCountAggregations.count_rules_snoozed,
|
||||||
count_rules_muted: totalCountAggregations.count_rules_muted,
|
count_rules_muted: totalCountAggregations.count_rules_muted,
|
||||||
|
count_mw_total: MWTelemetry.count_mw_total,
|
||||||
|
count_mw_with_repeat_toggle_on: MWTelemetry.count_mw_with_repeat_toggle_on,
|
||||||
|
count_mw_with_filter_alert_toggle_on:
|
||||||
|
MWTelemetry.count_mw_with_filter_alert_toggle_on,
|
||||||
count_rules_with_muted_alerts: totalCountAggregations.count_rules_with_muted_alerts,
|
count_rules_with_muted_alerts: totalCountAggregations.count_rules_with_muted_alerts,
|
||||||
count_connector_types_by_consumers:
|
count_connector_types_by_consumers:
|
||||||
totalCountAggregations.count_connector_types_by_consumers,
|
totalCountAggregations.count_connector_types_by_consumers,
|
||||||
|
|
|
@ -146,6 +146,9 @@ export const stateSchemaByVersion = {
|
||||||
}),
|
}),
|
||||||
count_rules_snoozed: schema.number(),
|
count_rules_snoozed: schema.number(),
|
||||||
count_rules_muted: schema.number(),
|
count_rules_muted: schema.number(),
|
||||||
|
count_mw_total: schema.number(),
|
||||||
|
count_mw_with_repeat_toggle_on: schema.number(),
|
||||||
|
count_mw_with_filter_alert_toggle_on: schema.number(),
|
||||||
count_rules_with_muted_alerts: schema.number(),
|
count_rules_with_muted_alerts: schema.number(),
|
||||||
count_connector_types_by_consumers: schema.recordOf(
|
count_connector_types_by_consumers: schema.recordOf(
|
||||||
schema.string(),
|
schema.string(),
|
||||||
|
@ -248,6 +251,9 @@ export const emptyState: LatestTaskStateSchema = {
|
||||||
},
|
},
|
||||||
count_rules_snoozed: 0,
|
count_rules_snoozed: 0,
|
||||||
count_rules_muted: 0,
|
count_rules_muted: 0,
|
||||||
|
count_mw_total: 0,
|
||||||
|
count_mw_with_repeat_toggle_on: 0,
|
||||||
|
count_mw_with_filter_alert_toggle_on: 0,
|
||||||
count_rules_with_muted_alerts: 0,
|
count_rules_with_muted_alerts: 0,
|
||||||
count_connector_types_by_consumers: {},
|
count_connector_types_by_consumers: {},
|
||||||
count_rules_namespaces: 0,
|
count_rules_namespaces: 0,
|
||||||
|
|
|
@ -41,6 +41,9 @@ export interface AlertingUsage {
|
||||||
count_connector_types_by_consumers: Record<string, Record<string, number>>;
|
count_connector_types_by_consumers: Record<string, Record<string, number>>;
|
||||||
count_rules_snoozed: number;
|
count_rules_snoozed: number;
|
||||||
count_rules_muted: number;
|
count_rules_muted: number;
|
||||||
|
count_mw_total: number;
|
||||||
|
count_mw_with_repeat_toggle_on: number;
|
||||||
|
count_mw_with_filter_alert_toggle_on: number;
|
||||||
count_rules_with_muted_alerts: number;
|
count_rules_with_muted_alerts: number;
|
||||||
count_rules_by_execution_status_per_day: Record<string, number>;
|
count_rules_by_execution_status_per_day: Record<string, number>;
|
||||||
percentile_num_generated_actions_per_day: {
|
percentile_num_generated_actions_per_day: {
|
||||||
|
|
|
@ -1724,6 +1724,15 @@
|
||||||
"count_rules_muted": {
|
"count_rules_muted": {
|
||||||
"type": "long"
|
"type": "long"
|
||||||
},
|
},
|
||||||
|
"count_mw_total": {
|
||||||
|
"type": "long"
|
||||||
|
},
|
||||||
|
"count_mw_with_repeat_toggle_on": {
|
||||||
|
"type": "long"
|
||||||
|
},
|
||||||
|
"count_mw_with_filter_alert_toggle_on": {
|
||||||
|
"type": "long"
|
||||||
|
},
|
||||||
"count_rules_with_muted_alerts": {
|
"count_rules_with_muted_alerts": {
|
||||||
"type": "long"
|
"type": "long"
|
||||||
},
|
},
|
||||||
|
|
|
@ -90,6 +90,44 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F
|
||||||
return ruleResponse.body.id;
|
return ruleResponse.body.id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function createMaintenanceWindow({
|
||||||
|
spaceId,
|
||||||
|
interval,
|
||||||
|
scopedQuery = null,
|
||||||
|
}: {
|
||||||
|
spaceId: string;
|
||||||
|
interval?: number;
|
||||||
|
scopedQuery?: {
|
||||||
|
filters: string[];
|
||||||
|
kql: string;
|
||||||
|
dsl: string;
|
||||||
|
} | null;
|
||||||
|
}) {
|
||||||
|
const response = await supertestWithoutAuth
|
||||||
|
.post(`${getUrlPrefix(spaceId)}/internal/alerting/rules/maintenance_window`)
|
||||||
|
.set('kbn-xsrf', 'foo')
|
||||||
|
.auth(Superuser.username, Superuser.password)
|
||||||
|
.send({
|
||||||
|
title: 'test-maintenance-window',
|
||||||
|
duration: 60 * 60 * 1000, // 1 hr
|
||||||
|
r_rule: {
|
||||||
|
dtstart: new Date().toISOString(),
|
||||||
|
tzid: 'UTC',
|
||||||
|
freq: 0,
|
||||||
|
count: 1,
|
||||||
|
...(interval ? { interval } : {}),
|
||||||
|
},
|
||||||
|
category_ids: ['management'],
|
||||||
|
scoped_query: scopedQuery,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(response.status).to.equal(200);
|
||||||
|
|
||||||
|
objectRemover.add(spaceId, response.body.id, 'rules/maintenance_window', 'alerting', true);
|
||||||
|
|
||||||
|
return response.body.id;
|
||||||
|
}
|
||||||
|
|
||||||
async function setup() {
|
async function setup() {
|
||||||
// Create rules and connectors in multiple spaces
|
// Create rules and connectors in multiple spaces
|
||||||
for (const space of Spaces) {
|
for (const space of Spaces) {
|
||||||
|
@ -216,6 +254,18 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F
|
||||||
actions: [],
|
actions: [],
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
// MW with both toggles off
|
||||||
|
await createMaintenanceWindow({ spaceId: space.id });
|
||||||
|
// MW with 'Repeat' toggle on and 'Filter alerts' toggle on
|
||||||
|
await createMaintenanceWindow({
|
||||||
|
spaceId: space.id,
|
||||||
|
interval: 1,
|
||||||
|
scopedQuery: {
|
||||||
|
filters: [],
|
||||||
|
kql: 'kibana.alert.job_errors_results.job_id : * ',
|
||||||
|
dsl: '{"bool":{"must":[],"filter":[{"bool":{"should":[{"exists":{"field":"kibana.alert.job_errors_results.job_id"}}],"minimum_should_match":1}}],"should":[],"must_not":[]}}',
|
||||||
|
},
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -500,6 +550,11 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F
|
||||||
|
|
||||||
expect(telemetry.count_rules_by_execution_status_per_day.failure > 0).to.be(true);
|
expect(telemetry.count_rules_by_execution_status_per_day.failure > 0).to.be(true);
|
||||||
expect(telemetry.count_rules_by_execution_status_per_day.success > 0).to.be(true);
|
expect(telemetry.count_rules_by_execution_status_per_day.success > 0).to.be(true);
|
||||||
|
|
||||||
|
// maintenance window telemetry
|
||||||
|
expect(telemetry.count_mw_total).to.equal(6);
|
||||||
|
expect(telemetry.count_mw_with_filter_alert_toggle_on).to.equal(3);
|
||||||
|
expect(telemetry.count_mw_with_repeat_toggle_on).to.equal(3);
|
||||||
}
|
}
|
||||||
|
|
||||||
it('should retrieve telemetry data in the expected format', async () => {
|
it('should retrieve telemetry data in the expected format', async () => {
|
||||||
|
@ -527,7 +582,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F
|
||||||
let actionsTelemetry: any;
|
let actionsTelemetry: any;
|
||||||
await retry.try(async () => {
|
await retry.try(async () => {
|
||||||
const telemetryTask = await es.get<TaskManagerDoc>({
|
const telemetryTask = await es.get<TaskManagerDoc>({
|
||||||
id: `task:Actions-actions_telemetry`,
|
id: 'task:Actions-actions_telemetry',
|
||||||
index: '.kibana_task_manager',
|
index: '.kibana_task_manager',
|
||||||
});
|
});
|
||||||
expect(telemetryTask!._source!.task?.status).to.be('idle');
|
expect(telemetryTask!._source!.task?.status).to.be('idle');
|
||||||
|
@ -550,7 +605,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F
|
||||||
let alertingTelemetry: any;
|
let alertingTelemetry: any;
|
||||||
await retry.try(async () => {
|
await retry.try(async () => {
|
||||||
const telemetryTask = await es.get<TaskManagerDoc>({
|
const telemetryTask = await es.get<TaskManagerDoc>({
|
||||||
id: `task:Alerting-alerting_telemetry`,
|
id: 'task:Alerting-alerting_telemetry',
|
||||||
index: '.kibana_task_manager',
|
index: '.kibana_task_manager',
|
||||||
});
|
});
|
||||||
expect(telemetryTask!._source!.task?.status).to.be('idle');
|
expect(telemetryTask!._source!.task?.status).to.be('idle');
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue