From fff8ae9bcce73d6ae7a76ec4a9fd75babc08c5bf Mon Sep 17 00:00:00 2001 From: Alexi Doak <109488926+doakalexi@users.noreply.github.com> Date: Mon, 16 Jun 2025 21:13:36 -0700 Subject: [PATCH] [Response Ops][Alerting] Cannot read properties of undefined (reading 'getActiveCount') (#221799) Resolves https://github.com/elastic/kibana/issues/208740 ## Summary This error message comes from the code where we drop the oldest recovered alerts from being tracked in the task state when there are more than 1000 (or alert limit) recovered alerts. The flapping refactor fixed this specific error, but I noticed that the alert documents weren't being updated before the alerts were dropped. This PR just moves this logic to the function that gets all the alerts to serialize in the task state, which happens after the alert documents are updated. ### Checklist Check the PR satisfies following conditions. - [ ] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios ### To verify 1. Set `xpack.alerting.rules.run.alerts.max: 3` in kibana.yml 2. Start kibana and create a rule that will generate 3 alerts 3. Stop kibana and set the max alert limit to 2 in kibana.yml 4. Start kibana and update the conditions to recover the alerts. Because there are 3 alerts recovering and we can only track 2, one alert will be dropped from the task state. 5. Verify that all the alerts are marked as recovered. Let the rule run to verify that one of the alert's alert document is no longer updated. --- .buildkite/ftr_platform_stateful_configs.yml | 1 + .../legacy_alerts_client.test.ts | 2 - .../alerts_client/legacy_alerts_client.ts | 4 +- .../delay_recovered_flapping_alerts.test.ts | 103 +------ .../delay_recovered_flapping_alerts.ts | 50 +--- .../lib/flapping/determine_flapping_alerts.ts | 7 - .../optimize_task_state_for_flapping.test.ts | 95 +++++++ .../optimize_task_state_for_flapping.ts | 74 +++++ .../server/lib/to_raw_alert_instances.test.ts | 20 +- .../server/lib/to_raw_alert_instances.ts | 26 +- .../alerting_api_integration/common/config.ts | 4 +- .../alerts_as_data/alerts_as_data_flapping.ts | 262 ++++++++++++++++++ .../circuit_breaker/index.ts | 8 +- .../group4/builtin_alert_types/index.ts | 1 - .../config_with_schedule_circuit_breaker.ts | 25 ++ 15 files changed, 498 insertions(+), 184 deletions(-) create mode 100644 x-pack/platform/plugins/shared/alerting/server/lib/flapping/optimize_task_state_for_flapping.test.ts create mode 100644 x-pack/platform/plugins/shared/alerting/server/lib/flapping/optimize_task_state_for_flapping.ts create mode 100644 x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/config_with_schedule_circuit_breaker.ts diff --git a/.buildkite/ftr_platform_stateful_configs.yml b/.buildkite/ftr_platform_stateful_configs.yml index 00294960a374..76e5635b116b 100644 --- a/.buildkite/ftr_platform_stateful_configs.yml +++ b/.buildkite/ftr_platform_stateful_configs.yml @@ -154,6 +154,7 @@ enabled: - x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group2/config.ts - x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group3/config.ts - x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/config.ts + - x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/config_with_schedule_circuit_breaker.ts - x-pack/platform/test/alerting_api_integration/spaces_only/tests/actions/config.ts - x-pack/platform/test/alerting_api_integration/spaces_only/tests/action_task_params/config.ts - x-pack/test/api_integration_basic/config.ts diff --git a/x-pack/platform/plugins/shared/alerting/server/alerts_client/legacy_alerts_client.test.ts b/x-pack/platform/plugins/shared/alerting/server/alerts_client/legacy_alerts_client.test.ts index 952c9d717839..e742bf262f1c 100644 --- a/x-pack/platform/plugins/shared/alerting/server/alerts_client/legacy_alerts_client.test.ts +++ b/x-pack/platform/plugins/shared/alerting/server/alerts_client/legacy_alerts_client.test.ts @@ -460,7 +460,6 @@ describe('Legacy Alerts Client', () => { alertsClient.determineFlappingAlerts(); expect(determineFlappingAlerts).toHaveBeenCalledWith({ - logger, newAlerts: {}, activeAlerts: {}, recoveredAlerts: {}, @@ -471,7 +470,6 @@ describe('Legacy Alerts Client', () => { }, previouslyRecoveredAlerts: {}, actionGroupId: 'default', - maxAlerts: 1000, }); expect(alertsClient.getProcessedAlerts('active')).toEqual({ diff --git a/x-pack/platform/plugins/shared/alerting/server/alerts_client/legacy_alerts_client.ts b/x-pack/platform/plugins/shared/alerting/server/alerts_client/legacy_alerts_client.ts index 3dd6f8b6c4de..04547fa2e571 100644 --- a/x-pack/platform/plugins/shared/alerting/server/alerts_client/legacy_alerts_client.ts +++ b/x-pack/platform/plugins/shared/alerting/server/alerts_client/legacy_alerts_client.ts @@ -216,6 +216,8 @@ export class LegacyAlertsClient< public getRawAlertInstancesForState(shouldOptimizeTaskState?: boolean) { return toRawAlertInstances( + this.options.logger, + this.maxAlerts, this.processedAlerts.trackedActiveAlerts, this.processedAlerts.trackedRecoveredAlerts, shouldOptimizeTaskState @@ -225,14 +227,12 @@ export class LegacyAlertsClient< public determineFlappingAlerts() { if (this.flappingSettings.enabled) { const alerts = determineFlappingAlerts({ - logger: this.options.logger, newAlerts: this.processedAlerts.new, activeAlerts: this.processedAlerts.active, recoveredAlerts: this.processedAlerts.recovered, flappingSettings: this.flappingSettings, previouslyRecoveredAlerts: this.trackedAlerts.recovered, actionGroupId: this.options.ruleType.defaultActionGroupId, - maxAlerts: this.maxAlerts, }); this.processedAlerts.new = alerts.newAlerts; diff --git a/x-pack/platform/plugins/shared/alerting/server/lib/flapping/delay_recovered_flapping_alerts.test.ts b/x-pack/platform/plugins/shared/alerting/server/lib/flapping/delay_recovered_flapping_alerts.test.ts index b4790d46c624..f414ef456b81 100644 --- a/x-pack/platform/plugins/shared/alerting/server/lib/flapping/delay_recovered_flapping_alerts.test.ts +++ b/x-pack/platform/plugins/shared/alerting/server/lib/flapping/delay_recovered_flapping_alerts.test.ts @@ -5,18 +5,12 @@ * 2.0. */ -import { loggingSystemMock } from '@kbn/core-logging-server-mocks'; import { DEFAULT_FLAPPING_SETTINGS } from '../../../common/rules_settings'; import { Alert } from '../../alert'; import { alertsWithAnyUUID } from '../../test_utils'; -import { - delayRecoveredFlappingAlerts, - getEarlyRecoveredAlertIds, -} from './delay_recovered_flapping_alerts'; +import { delayRecoveredFlappingAlerts } from './delay_recovered_flapping_alerts'; describe('delayRecoveredFlappingAlerts', () => { - const logger = loggingSystemMock.createLogger(); - test('should set pendingRecoveredCount to zero for all active alerts', () => { const alert1 = new Alert('1', { meta: { flapping: true, pendingRecoveredCount: 3, uuid: 'uuid-1' }, @@ -24,10 +18,8 @@ describe('delayRecoveredFlappingAlerts', () => { const alert2 = new Alert('2', { meta: { flapping: false, uuid: 'uuid-2' } }); const { newAlerts, activeAlerts, trackedActiveAlerts } = delayRecoveredFlappingAlerts( - logger, DEFAULT_FLAPPING_SETTINGS, 'default', - 1000, { // new alerts '1': alert1, @@ -121,10 +113,8 @@ describe('delayRecoveredFlappingAlerts', () => { recoveredAlerts, trackedRecoveredAlerts, } = delayRecoveredFlappingAlerts( - logger, DEFAULT_FLAPPING_SETTINGS, 'default', - 1000, {}, // new alerts {}, // active alerts {}, // tracked active alerts @@ -238,95 +228,4 @@ describe('delayRecoveredFlappingAlerts', () => { } `); }); - - describe('getEarlyRecoveredAlertIds', () => { - const alert1 = new Alert('1', { meta: { flappingHistory: [true, true, true, true] } }); - const alert2 = new Alert('2', { meta: { flappingHistory: new Array(20).fill(false) } }); - const alert3 = new Alert('3', { meta: { flappingHistory: [true, true] } }); - - test('should remove longest recovered alerts', () => { - const { recoveredAlerts, trackedRecoveredAlerts } = delayRecoveredFlappingAlerts( - logger, - DEFAULT_FLAPPING_SETTINGS, - 'default', - 2, - {}, // new alerts - {}, // active alerts - {}, // tracked active alerts - { - // recovered alerts - '1': alert1, - '2': alert2, - '3': alert3, - }, - { - // tracked recovered alerts - '1': alert1, - '2': alert2, - '3': alert3, - } - ); - expect(Object.keys(recoveredAlerts).length).toBe(3); - expect(recoveredAlerts['2'].getFlapping()).toBe(false); - expect(Object.keys(trackedRecoveredAlerts).length).toBe(2); - }); - - test('should not remove alerts if the num of recovered alerts is not at the limit', () => { - const { recoveredAlerts, trackedRecoveredAlerts } = delayRecoveredFlappingAlerts( - logger, - DEFAULT_FLAPPING_SETTINGS, - 'default', - 3, - {}, // new alerts - {}, // active alerts - {}, // tracked active alerts - { - // recovered alerts - '1': alert1, - '2': alert2, - '3': alert3, - }, - { - // tracked recovered alerts - '1': alert1, - '2': alert2, - '3': alert3, - } - ); - expect(Object.keys(recoveredAlerts).length).toBe(3); - expect(recoveredAlerts['2'].getFlapping()).toBe(false); - expect(Object.keys(trackedRecoveredAlerts).length).toBe(3); - }); - - test('getEarlyRecoveredAlertIds should return longest recovered alerts', () => { - const alertIds = getEarlyRecoveredAlertIds( - logger, - { - // tracked recovered alerts - '1': alert1, - '2': alert2, - '3': alert3, - }, - 2 - ); - expect(alertIds).toEqual(['2']); - - expect(logger.warn).toBeCalledWith( - 'Recovered alerts have exceeded the max alert limit of 2 : dropping 1 alert.' - ); - }); - - test('getEarlyRecoveredAlertIds should not return alerts if the num of recovered alerts is not at the limit', () => { - const trimmedAlerts = getEarlyRecoveredAlertIds( - logger, - { - // tracked recovered alerts - '1': alert1, - '2': alert2, - }, - 2 - ); - expect(trimmedAlerts).toEqual([]); - }); - }); }); diff --git a/x-pack/platform/plugins/shared/alerting/server/lib/flapping/delay_recovered_flapping_alerts.ts b/x-pack/platform/plugins/shared/alerting/server/lib/flapping/delay_recovered_flapping_alerts.ts index 08bd5cf730a4..854ac5d77d45 100644 --- a/x-pack/platform/plugins/shared/alerting/server/lib/flapping/delay_recovered_flapping_alerts.ts +++ b/x-pack/platform/plugins/shared/alerting/server/lib/flapping/delay_recovered_flapping_alerts.ts @@ -5,8 +5,7 @@ * 2.0. */ -import { keys, map } from 'lodash'; -import type { Logger } from '@kbn/logging'; +import { keys } from 'lodash'; import type { RulesSettingsFlappingProperties } from '../../../common/rules_settings'; import { Alert } from '../../alert'; import type { AlertInstanceState, AlertInstanceContext } from '../../types'; @@ -17,10 +16,8 @@ export function delayRecoveredFlappingAlerts< ActionGroupIds extends string, RecoveryActionGroupId extends string >( - logger: Logger, flappingSettings: RulesSettingsFlappingProperties, actionGroupId: string, - maxAlerts: number, newAlerts: Record> = {}, activeAlerts: Record> = {}, trackedActiveAlerts: Record> = {}, @@ -66,19 +63,6 @@ export function delayRecoveredFlappingAlerts< } } - const earlyRecoveredAlertIds = getEarlyRecoveredAlertIds( - logger, - trackedRecoveredAlerts, - maxAlerts - ); - for (const id of earlyRecoveredAlertIds) { - const alert = trackedRecoveredAlerts[id]; - alert.setFlapping(false); - recoveredAlerts[id] = alert; - - delete trackedRecoveredAlerts[id]; - } - return { newAlerts, activeAlerts, @@ -87,35 +71,3 @@ export function delayRecoveredFlappingAlerts< trackedRecoveredAlerts, }; } - -export function getEarlyRecoveredAlertIds< - State extends AlertInstanceState, - Context extends AlertInstanceContext, - RecoveryActionGroupId extends string ->( - logger: Logger, - trackedRecoveredAlerts: Record>, - maxAlerts: number -) { - const alerts = map(trackedRecoveredAlerts, (alert, id) => { - return { - id, - flappingHistory: alert.getFlappingHistory() || [], - }; - }); - - let earlyRecoveredAlertIds: string[] = []; - if (alerts.length > maxAlerts) { - alerts.sort((a, b) => { - return a.flappingHistory.length - b.flappingHistory.length; - }); - - earlyRecoveredAlertIds = alerts.slice(maxAlerts).map((alert) => alert.id); - logger.warn( - `Recovered alerts have exceeded the max alert limit of ${maxAlerts} : dropping ${ - earlyRecoveredAlertIds.length - } ${earlyRecoveredAlertIds.length > 1 ? 'alerts' : 'alert'}.` - ); - } - return earlyRecoveredAlertIds; -} diff --git a/x-pack/platform/plugins/shared/alerting/server/lib/flapping/determine_flapping_alerts.ts b/x-pack/platform/plugins/shared/alerting/server/lib/flapping/determine_flapping_alerts.ts index 806ad97afd03..251052e850b7 100644 --- a/x-pack/platform/plugins/shared/alerting/server/lib/flapping/determine_flapping_alerts.ts +++ b/x-pack/platform/plugins/shared/alerting/server/lib/flapping/determine_flapping_alerts.ts @@ -5,7 +5,6 @@ * 2.0. */ -import type { Logger } from '@kbn/logging'; import type { Alert } from '../../alert'; import type { AlertInstanceState, AlertInstanceContext } from '../../types'; import type { RulesSettingsFlappingProperties } from '../../../common/rules_settings'; @@ -19,14 +18,12 @@ interface DetermineFlappingAlertsOpts< ActionGroupIds extends string, RecoveryActionGroupId extends string > { - logger: Logger; newAlerts: Record>; activeAlerts: Record>; recoveredAlerts: Record>; flappingSettings: RulesSettingsFlappingProperties; previouslyRecoveredAlerts: Record>; actionGroupId: string; - maxAlerts: number; } export function determineFlappingAlerts< @@ -35,14 +32,12 @@ export function determineFlappingAlerts< ActionGroupIds extends string, RecoveryActionGroupId extends string >({ - logger, newAlerts, activeAlerts, recoveredAlerts, flappingSettings, previouslyRecoveredAlerts, actionGroupId, - maxAlerts, }: DetermineFlappingAlertsOpts) { setFlapping( flappingSettings, @@ -58,10 +53,8 @@ export function determineFlappingAlerts< >(flappingSettings, newAlerts, activeAlerts, recoveredAlerts, previouslyRecoveredAlerts); alerts = delayRecoveredFlappingAlerts( - logger, flappingSettings, actionGroupId, - maxAlerts, alerts.newAlerts, alerts.activeAlerts, alerts.trackedActiveAlerts, diff --git a/x-pack/platform/plugins/shared/alerting/server/lib/flapping/optimize_task_state_for_flapping.test.ts b/x-pack/platform/plugins/shared/alerting/server/lib/flapping/optimize_task_state_for_flapping.test.ts new file mode 100644 index 000000000000..0e7dc3076ab7 --- /dev/null +++ b/x-pack/platform/plugins/shared/alerting/server/lib/flapping/optimize_task_state_for_flapping.test.ts @@ -0,0 +1,95 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { loggingSystemMock } from '@kbn/core-logging-server-mocks'; +import { Alert } from '../../alert'; +import { + optimizeTaskStateForFlapping, + getAlertIdsOverMaxLimit, +} from './optimize_task_state_for_flapping'; + +describe('optimizeTaskStateForFlapping', () => { + const logger = loggingSystemMock.createLogger(); + + const alert1 = new Alert('1', { meta: { flappingHistory: [true, true, true, true] } }); + const alert2 = new Alert('2', { meta: { flappingHistory: new Array(20).fill(true) } }); + const alert3 = new Alert('3', { meta: { flappingHistory: [true, true] } }); + const alert4 = new Alert('4', { + meta: { flappingHistory: new Array(16).fill(false).concat([true, true, true, true]) }, + }); + const alert5 = new Alert('5', { meta: { flappingHistory: new Array(20).fill(false) } }); + + test('should remove longest recovered alerts', () => { + const recoveredAlerts = optimizeTaskStateForFlapping( + logger, + { + '1': alert1, + '2': alert2, + '3': alert3, + }, + 2 + ); + + expect(Object.keys(recoveredAlerts)).toEqual(['1', '3']); + }); + + test('should not remove alerts if the number of recovered alerts is not over the limit', () => { + const recoveredAlerts = optimizeTaskStateForFlapping( + logger, + { + '1': alert1, + '2': alert2, + '3': alert3, + }, + 3 + ); + expect(Object.keys(recoveredAlerts)).toEqual(['1', '2', '3']); + }); + + test('should return all flapping alerts', () => { + const recoveredAlerts = optimizeTaskStateForFlapping( + logger, + { + '4': alert4, + '5': alert5, + }, + 1000 + ); + expect(Object.keys(recoveredAlerts)).toEqual(['4']); + }); + + describe('getAlertIdsOverMaxLimit', () => { + test('getAlertIdsOverMaxLimit should return longest recovered alerts', () => { + const alertIds = getAlertIdsOverMaxLimit( + logger, + { + '1': alert1, + '2': alert2, + '3': alert3, + }, + 2 + ); + expect(alertIds).toEqual(['2']); + + expect(logger.warn).toBeCalledWith( + 'Recovered alerts have exceeded the max alert limit of 2 : dropping 1 alert.' + ); + }); + + test('getAlertIdsOverMaxLimit should not return alerts if the num of recovered alerts is not at the limit', () => { + const trimmedAlerts = getAlertIdsOverMaxLimit( + logger, + { + '1': alert1, + '2': alert2, + }, + 2 + ); + expect(trimmedAlerts).toEqual([]); + }); + }); +}); diff --git a/x-pack/platform/plugins/shared/alerting/server/lib/flapping/optimize_task_state_for_flapping.ts b/x-pack/platform/plugins/shared/alerting/server/lib/flapping/optimize_task_state_for_flapping.ts new file mode 100644 index 000000000000..24735cff9c48 --- /dev/null +++ b/x-pack/platform/plugins/shared/alerting/server/lib/flapping/optimize_task_state_for_flapping.ts @@ -0,0 +1,74 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { keys, map } from 'lodash'; +import type { Logger } from '@kbn/logging'; +import type { Alert } from '../../alert'; +import type { AlertInstanceState, AlertInstanceContext } from '../../types'; + +export function optimizeTaskStateForFlapping< + State extends AlertInstanceState, + Context extends AlertInstanceContext, + RecoveryActionGroupId extends string +>( + logger: Logger, + recoveredAlerts: Record> = {}, + maxAlerts: number +): Record> { + // this is a space saving effort that will remove the oldest recovered alerts + // tracked in the task state if the number of alerts we plan to track is over the max alert limit + const alertIdsOverMaxLimit = getAlertIdsOverMaxLimit(logger, recoveredAlerts, maxAlerts); + for (const id of alertIdsOverMaxLimit) { + delete recoveredAlerts[id]; + } + + for (const id of keys(recoveredAlerts)) { + const alert = recoveredAlerts[id]; + // this is also a space saving effort that will only remove recovered alerts if they are not flapping + // and if the flapping array does not contain any state changes + const flapping = alert.getFlapping(); + const flappingHistory: boolean[] = alert.getFlappingHistory() || []; + const numStateChanges = flappingHistory.filter((f) => f).length; + if (!flapping && numStateChanges === 0) { + delete recoveredAlerts[id]; + } + } + return recoveredAlerts; +} + +export function getAlertIdsOverMaxLimit< + State extends AlertInstanceState, + Context extends AlertInstanceContext, + RecoveryActionGroupId extends string +>( + logger: Logger, + trackedRecoveredAlerts: Record>, + maxAlerts: number +) { + const alerts = map(trackedRecoveredAlerts, (alert, id) => { + return { + id, + flappingHistory: alert.getFlappingHistory() || [], + }; + }); + + let earlyRecoveredAlertIds: string[] = []; + if (alerts.length > maxAlerts) { + // alerts are sorted by age using the length of the flapping array + alerts.sort((a, b) => { + return a.flappingHistory.length - b.flappingHistory.length; + }); + + earlyRecoveredAlertIds = alerts.slice(maxAlerts).map((alert) => alert.id); + logger.warn( + `Recovered alerts have exceeded the max alert limit of ${maxAlerts} : dropping ${ + earlyRecoveredAlertIds.length + } ${earlyRecoveredAlertIds.length > 1 ? 'alerts' : 'alert'}.` + ); + } + return earlyRecoveredAlertIds; +} diff --git a/x-pack/platform/plugins/shared/alerting/server/lib/to_raw_alert_instances.test.ts b/x-pack/platform/plugins/shared/alerting/server/lib/to_raw_alert_instances.test.ts index 85ccdad6627b..5064261c6cf3 100644 --- a/x-pack/platform/plugins/shared/alerting/server/lib/to_raw_alert_instances.test.ts +++ b/x-pack/platform/plugins/shared/alerting/server/lib/to_raw_alert_instances.test.ts @@ -5,6 +5,7 @@ * 2.0. */ +import { loggingSystemMock } from '@kbn/core-logging-server-mocks'; import { keys, size } from 'lodash'; import { Alert } from '../alert'; import { toRawAlertInstances } from './to_raw_alert_instances'; @@ -12,6 +13,8 @@ import { toRawAlertInstances } from './to_raw_alert_instances'; describe('toRawAlertInstances', () => { const flapping = new Array(16).fill(false).concat([true, true, true, true]); const notFlapping = new Array(20).fill(false); + const logger = loggingSystemMock.createLogger(); + const maxAlertLimit = 1000; describe('toRawAlertInstances', () => { test('should return all active alerts', () => { @@ -19,7 +22,7 @@ describe('toRawAlertInstances', () => { '1': new Alert('1', { meta: { flappingHistory: flapping } }), '2': new Alert('2', { meta: { flappingHistory: [false, false] } }), }; - const { rawActiveAlerts } = toRawAlertInstances(activeAlerts, {}); + const { rawActiveAlerts } = toRawAlertInstances(logger, maxAlertLimit, activeAlerts, {}); expect(size(rawActiveAlerts)).toEqual(2); }); @@ -28,7 +31,12 @@ describe('toRawAlertInstances', () => { '1': new Alert('1', { meta: { flappingHistory: flapping } }), '2': new Alert('2', { meta: { flappingHistory: notFlapping } }), }; - const { rawRecoveredAlerts } = toRawAlertInstances({}, recoveredAlerts); + const { rawRecoveredAlerts } = toRawAlertInstances( + logger, + maxAlertLimit, + {}, + recoveredAlerts + ); expect(keys(rawRecoveredAlerts)).toEqual(['1', '2']); }); @@ -37,7 +45,13 @@ describe('toRawAlertInstances', () => { '1': new Alert('1', { meta: { flappingHistory: flapping } }), '2': new Alert('2', { meta: { flappingHistory: notFlapping } }), }; - const { rawRecoveredAlerts } = toRawAlertInstances({}, recoveredAlerts, true); + const { rawRecoveredAlerts } = toRawAlertInstances( + logger, + maxAlertLimit, + {}, + recoveredAlerts, + true + ); expect(keys(rawRecoveredAlerts)).toEqual(['1']); }); }); diff --git a/x-pack/platform/plugins/shared/alerting/server/lib/to_raw_alert_instances.ts b/x-pack/platform/plugins/shared/alerting/server/lib/to_raw_alert_instances.ts index 3857b2c9ef02..18874ab601ff 100644 --- a/x-pack/platform/plugins/shared/alerting/server/lib/to_raw_alert_instances.ts +++ b/x-pack/platform/plugins/shared/alerting/server/lib/to_raw_alert_instances.ts @@ -6,8 +6,10 @@ */ import { keys } from 'lodash'; +import type { Logger } from '@kbn/logging'; import type { Alert } from '../alert'; import type { AlertInstanceState, AlertInstanceContext, RawAlertInstance } from '../types'; +import { optimizeTaskStateForFlapping } from './flapping/optimize_task_state_for_flapping'; export function toRawAlertInstances< State extends AlertInstanceState, @@ -15,6 +17,8 @@ export function toRawAlertInstances< ActionGroupIds extends string, RecoveryActionGroupId extends string >( + logger: Logger, + maxAlerts: number, activeAlerts: Record> = {}, recoveredAlerts: Record> = {}, shouldOptimizeTaskState: boolean = false @@ -29,22 +33,12 @@ export function toRawAlertInstances< rawActiveAlerts[id] = activeAlerts[id].toRaw(); } - for (const id of keys(recoveredAlerts)) { - const alert = recoveredAlerts[id]; - if (shouldOptimizeTaskState) { - // this is a space saving effort that will only return recovered alerts if they are flapping - // or if the flapping array contains any state changes - const flapping = alert.getFlapping(); - const flappingHistory: boolean[] = alert.getFlappingHistory() || []; - const numStateChanges = flappingHistory.filter((f) => f).length; - if (flapping) { - rawRecoveredAlerts[id] = alert.toRaw(true); - } else if (numStateChanges > 0) { - rawRecoveredAlerts[id] = alert.toRaw(true); - } - } else { - rawRecoveredAlerts[id] = alert.toRaw(true); - } + if (shouldOptimizeTaskState) { + recoveredAlerts = optimizeTaskStateForFlapping(logger, recoveredAlerts, maxAlerts); } + for (const id of keys(recoveredAlerts)) { + rawRecoveredAlerts[id] = recoveredAlerts[id].toRaw(true); + } + return { rawActiveAlerts, rawRecoveredAlerts }; } diff --git a/x-pack/platform/test/alerting_api_integration/common/config.ts b/x-pack/platform/test/alerting_api_integration/common/config.ts index 86f7f939a0a7..8d60b7885500 100644 --- a/x-pack/platform/test/alerting_api_integration/common/config.ts +++ b/x-pack/platform/test/alerting_api_integration/common/config.ts @@ -38,6 +38,7 @@ interface CreateTestConfigOptions { experimentalFeatures?: ExperimentalConfigKeys; disabledRuleTypes?: string[]; enabledRuleTypes?: string[]; + maxAlerts?: number; } // test.not-enabled is specifically not enabled @@ -221,6 +222,7 @@ export function createTestConfig(name: string, options: CreateTestConfigOptions) enableFooterInEmail = true, maxScheduledPerMinute, experimentalFeatures = [], + maxAlerts = 20, } = options; return async ({ readConfigFile }: FtrConfigProviderContext) => { @@ -340,7 +342,7 @@ export function createTestConfig(name: string, options: CreateTestConfigOptions) '--xpack.alerting.invalidateApiKeysTask.removalDelay="1s"', '--xpack.alerting.healthCheck.interval="1s"', '--xpack.alerting.rules.minimumScheduleInterval.value="1s"', - '--xpack.alerting.rules.run.alerts.max=110', + `--xpack.alerting.rules.run.alerts.max=${maxAlerts}`, `--xpack.alerting.rules.run.actions.connectorTypeOverrides=${JSON.stringify([ { id: 'test.capped', max: '1' }, ])}`, diff --git a/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/alerts_as_data/alerts_as_data_flapping.ts b/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/alerts_as_data/alerts_as_data_flapping.ts index 045bd7bc99e8..647b4440244c 100644 --- a/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/alerts_as_data/alerts_as_data_flapping.ts +++ b/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/alerts_as_data/alerts_as_data_flapping.ts @@ -15,7 +15,10 @@ import { ALERT_FLAPPING_HISTORY, ALERT_RULE_UUID, ALERT_PENDING_RECOVERED_COUNT, + ALERT_STATUS, + ALERT_INSTANCE_ID, } from '@kbn/rule-data-utils'; +import type { IValidatedEvent } from '@kbn/event-log-plugin/server'; import type { FtrProviderContext } from '../../../../../common/ftr_provider_context'; import { Spaces } from '../../../../scenarios'; import type { TaskManagerDoc } from '../../../../../common/lib'; @@ -799,6 +802,264 @@ export default function createAlertsAsDataFlappingTest({ getService }: FtrProvid // Never flapped, since globl flapping is off expect(runWhichItFlapped).eql(0); }); + + it('should drop tracked alerts early after hitting the alert limit', async () => { + await supertest + .post(`${getUrlPrefix(Spaces.space1.id)}/internal/alerting/rules/settings/_flapping`) + .set('kbn-xsrf', 'foo') + .auth('superuser', 'superuser') + .send({ + enabled: true, + look_back_window: 6, + status_change_threshold: 4, + }) + .expect(200); + // wait so cache expires + await setTimeoutAsync(TEST_CACHE_EXPIRATION_TIME); + + const pattern = { + alertA: [true].concat(new Array(5).fill(false)), + alertB: [true].concat(new Array(5).fill(false)), + alertC: [true].concat(new Array(5).fill(false)), + alertD: [true].concat(new Array(5).fill(false)), + alertE: [true].concat(new Array(5).fill(false)), + alertF: [true].concat(new Array(5).fill(false)), + alertG: [true].concat(new Array(5).fill(false)), + alertH: [true].concat(new Array(5).fill(false)), + alertI: [true].concat(new Array(5).fill(false)), + alertJ: [true].concat(new Array(5).fill(false)), + alertK: [false, true].concat(new Array(4).fill(false)), + alertL: [false, true].concat(new Array(4).fill(false)), + alertM: [false, true].concat(new Array(4).fill(false)), + alertN: [false, true].concat(new Array(4).fill(false)), + alertO: [false, true].concat(new Array(4).fill(false)), + alertP: [false, true].concat(new Array(4).fill(false)), + alertQ: [false, true].concat(new Array(4).fill(false)), + alertR: [false, true].concat(new Array(4).fill(false)), + alertS: [false, true].concat(new Array(4).fill(false)), + alertT: [false, true].concat(new Array(4).fill(false)), + alertU: [false, true].concat(new Array(4).fill(false)), + alertV: [false, true].concat(new Array(4).fill(false)), + }; + const ruleParameters = { pattern }; + const createdRule = await supertestWithoutAuth + .post(`${getUrlPrefix(Spaces.space1.id)}/api/alerting/rule`) + .set('kbn-xsrf', 'foo') + .send( + getTestRuleData({ + rule_type_id: 'test.patternFiringAad', + schedule: { interval: '1d' }, + throttle: null, + params: ruleParameters, + actions: [], + notify_when: RuleNotifyWhen.CHANGE, + }) + ); + + expect(createdRule.status).to.eql(200); + const ruleId = createdRule.body.id; + objectRemover.add(Spaces.space1.id, ruleId, 'rule', 'alerting'); + + // -------------------------- + // RUN 1 - 10 new alerts + // -------------------------- + let events: IValidatedEvent[] = await waitForEventLogDocs( + ruleId, + new Map([['execute', { equal: 1 }]]) + ); + let executeEvent = events[0]; + let executionUuid = executeEvent?.kibana?.alert?.rule?.execution?.uuid; + expect(executionUuid).not.to.be(undefined); + + const alertDocsRun1 = await queryForAlertDocs(ruleId); + + let state: any = await getRuleState(ruleId); + expect(state.alertInstances.alertA.state.patternIndex).to.be(0); + expect(state.alertInstances.alertB.state.patternIndex).to.be(0); + expect(state.alertInstances.alertC.state.patternIndex).to.be(0); + expect(state.alertInstances.alertD.state.patternIndex).to.be(0); + expect(state.alertInstances.alertE.state.patternIndex).to.be(0); + expect(state.alertInstances.alertF.state.patternIndex).to.be(0); + expect(state.alertInstances.alertG.state.patternIndex).to.be(0); + expect(state.alertInstances.alertH.state.patternIndex).to.be(0); + expect(state.alertInstances.alertI.state.patternIndex).to.be(0); + expect(state.alertInstances.alertJ.state.patternIndex).to.be(0); + + expect(alertDocsRun1.length).to.equal(10); + + expect( + alertDocsRun1 + .filter((doc) => doc._source![ALERT_STATUS] === 'active') + .map((doc) => doc._source![ALERT_INSTANCE_ID]) + ).to.eql([ + 'alertA', + 'alertB', + 'alertC', + 'alertD', + 'alertE', + 'alertF', + 'alertG', + 'alertH', + 'alertI', + 'alertJ', + ]); + expect( + alertDocsRun1 + .filter((doc) => doc._source![ALERT_STATUS] === 'recovered') + .map((doc) => doc._source![ALERT_INSTANCE_ID]) + ).to.eql([]); + + // -------------------------- + // RUN 2 - 10 recovered, 12 new + // -------------------------- + let response = await supertestWithoutAuth + .post(`${getUrlPrefix(Spaces.space1.id)}/internal/alerting/rule/${ruleId}/_run_soon`) + .set('kbn-xsrf', 'foo'); + expect(response.status).to.eql(204); + + events = await waitForEventLogDocs(ruleId, new Map([['execute', { equal: 2 }]])); + executeEvent = events[1]; + executionUuid = executeEvent?.kibana?.alert?.rule?.execution?.uuid; + expect(executionUuid).not.to.be(undefined); + + const alertDocsRun2 = await queryForAlertDocs(ruleId); + + state = await getRuleState(ruleId); + expect(state.alertRecoveredInstances.alertA).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertB).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertC).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertD).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertE).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertF).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertG).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertH).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertI).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertJ).not.to.be(undefined); + expect(state.alertInstances.alertK.state.patternIndex).to.be(1); + expect(state.alertInstances.alertL.state.patternIndex).to.be(1); + expect(state.alertInstances.alertM.state.patternIndex).to.be(1); + expect(state.alertInstances.alertN.state.patternIndex).to.be(1); + expect(state.alertInstances.alertO.state.patternIndex).to.be(1); + expect(state.alertInstances.alertP.state.patternIndex).to.be(1); + expect(state.alertInstances.alertQ.state.patternIndex).to.be(1); + expect(state.alertInstances.alertR.state.patternIndex).to.be(1); + expect(state.alertInstances.alertS.state.patternIndex).to.be(1); + expect(state.alertInstances.alertT.state.patternIndex).to.be(1); + expect(state.alertInstances.alertU.state.patternIndex).to.be(1); + expect(state.alertInstances.alertV.state.patternIndex).to.be(1); + + expect(alertDocsRun2.length).to.equal(22); + + expect( + alertDocsRun2 + .filter((doc) => doc._source![ALERT_STATUS] === 'active') + .map((doc) => doc._source![ALERT_INSTANCE_ID]) + ).to.eql([ + 'alertK', + 'alertL', + 'alertM', + 'alertN', + 'alertO', + 'alertP', + 'alertQ', + 'alertR', + 'alertS', + 'alertT', + 'alertU', + 'alertV', + ]); + expect( + alertDocsRun2 + .filter((doc) => doc._source![ALERT_STATUS] === 'recovered') + .map((doc) => doc._source![ALERT_INSTANCE_ID]) + ).to.eql([ + 'alertA', + 'alertB', + 'alertC', + 'alertD', + 'alertE', + 'alertF', + 'alertG', + 'alertH', + 'alertI', + 'alertJ', + ]); + + // -------------------------- + // RUN 3 - 22 recovered, 5 new + // -------------------------- + response = await supertestWithoutAuth + .post(`${getUrlPrefix(Spaces.space1.id)}/internal/alerting/rule/${ruleId}/_run_soon`) + .set('kbn-xsrf', 'foo'); + expect(response.status).to.eql(204); + + events = await waitForEventLogDocs(ruleId, new Map([['execute', { equal: 3 }]])); + executeEvent = events[1]; + executionUuid = executeEvent?.kibana?.alert?.rule?.execution?.uuid; + expect(executionUuid).not.to.be(undefined); + + const alertDocsRun3 = await queryForAlertDocs(ruleId); + + state = await getRuleState(ruleId); + expect(state.alertRecoveredInstances.alertA).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertB).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertC).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertD).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertE).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertF).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertG).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertH).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertK).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertL).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertM).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertN).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertO).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertP).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertQ).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertR).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertS).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertT).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertU).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertV).not.to.be(undefined); + expect(state.alertRecoveredInstances.alertI).to.be(undefined); + expect(state.alertRecoveredInstances.alertJ).to.be(undefined); + + expect(alertDocsRun3.length).to.equal(22); + + expect( + alertDocsRun3 + .filter((doc) => doc._source![ALERT_STATUS] === 'active') + .map((doc) => doc._source![ALERT_INSTANCE_ID]) + ).to.eql([]); + expect( + alertDocsRun3 + .filter((doc) => doc._source![ALERT_STATUS] === 'recovered') + .map((doc) => doc._source![ALERT_INSTANCE_ID]) + ).to.eql([ + 'alertK', + 'alertL', + 'alertM', + 'alertN', + 'alertO', + 'alertP', + 'alertQ', + 'alertR', + 'alertS', + 'alertT', + 'alertU', + 'alertV', + 'alertA', + 'alertB', + 'alertC', + 'alertD', + 'alertE', + 'alertF', + 'alertG', + 'alertH', + 'alertI', + 'alertJ', + ]); + }); }); async function getRuleState(ruleId: string) { @@ -827,6 +1088,7 @@ export default function createAlertsAsDataFlappingTest({ getService }: FtrProvid }, }, }, + size: 25, }); return searchResult.hits.hits as Array>; } diff --git a/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/builtin_alert_types/circuit_breaker/index.ts b/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/builtin_alert_types/circuit_breaker/index.ts index 9c01fc0fb223..ba35b247d254 100644 --- a/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/builtin_alert_types/circuit_breaker/index.ts +++ b/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/builtin_alert_types/circuit_breaker/index.ts @@ -6,10 +6,16 @@ */ import type { FtrProviderContext } from '../../../../../../common/ftr_provider_context'; +import { buildUp, tearDown } from '../../../../helpers'; // eslint-disable-next-line import/no-default-export -export default function alertingCircuitBreakerTests({ loadTestFile }: FtrProviderContext) { +export default function alertingCircuitBreakerTests({ + loadTestFile, + getService, +}: FtrProviderContext) { describe('circuit_breakers', () => { + before(async () => await buildUp(getService)); + after(async () => await tearDown(getService)); /** * This tests the expected behavior for a rule type that hits the alert limit in a single execution. */ diff --git a/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/builtin_alert_types/index.ts b/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/builtin_alert_types/index.ts index f25edcc1a7b0..ffc2d99a3946 100644 --- a/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/builtin_alert_types/index.ts +++ b/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/builtin_alert_types/index.ts @@ -12,7 +12,6 @@ export default function alertingTests({ loadTestFile }: FtrProviderContext) { describe('builtin alertTypes', () => { loadTestFile(require.resolve('./long_running')); loadTestFile(require.resolve('./cancellable')); - loadTestFile(require.resolve('./circuit_breaker')); loadTestFile(require.resolve('./auto_recover')); }); } diff --git a/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/config_with_schedule_circuit_breaker.ts b/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/config_with_schedule_circuit_breaker.ts new file mode 100644 index 000000000000..de4cc0ecc390 --- /dev/null +++ b/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/config_with_schedule_circuit_breaker.ts @@ -0,0 +1,25 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { createTestConfig } from '../../../../common/config'; + +export const EmailDomainsAllowed = ['example.org', 'test.com']; + +// eslint-disable-next-line import/no-default-export +export default createTestConfig('spaces_only', { + disabledPlugins: ['security'], + license: 'trial', + enableActionsProxy: false, + verificationMode: 'none', + customizeLocalHostSsl: true, + preconfiguredAlertHistoryEsIndex: true, + emailDomainsAllowed: EmailDomainsAllowed, + useDedicatedTaskRunner: true, + testFiles: [require.resolve('./builtin_alert_types/circuit_breaker')], + reportName: 'X-Pack Alerting API Integration Tests - Alerting Circuit Breaker - group4', + maxAlerts: 110, +});