mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 01:38:56 -04:00
[Alerting][Event log] Persisting duration information for active alerts in event log (#101387)
* WIP * Storing start, duration and end in alert state * Writing to event log * Updating unit tests * Adding unit tests * Fixing uuid in tests * Updating functional test * Adding functional test * Removing console logs * Fixing unit tests * PR fixes * Removing uuid from alert information Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
parent
ee0efabbfc
commit
a12ff5d65d
6 changed files with 1229 additions and 108 deletions
File diff suppressed because it is too large
Load diff
|
@ -323,6 +323,12 @@ export class TaskRunner<
|
|||
alertLabel,
|
||||
});
|
||||
|
||||
trackAlertDurations({
|
||||
originalAlerts: originalAlertInstances,
|
||||
currentAlerts: instancesWithScheduledActions,
|
||||
recoveredAlerts: recoveredAlertInstances,
|
||||
});
|
||||
|
||||
generateNewAndRecoveredInstanceEvents({
|
||||
eventLogger,
|
||||
originalAlertInstances,
|
||||
|
@ -589,6 +595,61 @@ export class TaskRunner<
|
|||
}
|
||||
}
|
||||
|
||||
interface TrackAlertDurationsParams<
|
||||
InstanceState extends AlertInstanceState,
|
||||
InstanceContext extends AlertInstanceContext
|
||||
> {
|
||||
originalAlerts: Dictionary<AlertInstance<InstanceState, InstanceContext>>;
|
||||
currentAlerts: Dictionary<AlertInstance<InstanceState, InstanceContext>>;
|
||||
recoveredAlerts: Dictionary<AlertInstance<InstanceState, InstanceContext>>;
|
||||
}
|
||||
|
||||
function trackAlertDurations<
|
||||
InstanceState extends AlertInstanceState,
|
||||
InstanceContext extends AlertInstanceContext
|
||||
>(params: TrackAlertDurationsParams<InstanceState, InstanceContext>) {
|
||||
const currentTime = new Date().toISOString();
|
||||
const { currentAlerts, originalAlerts, recoveredAlerts } = params;
|
||||
const originalAlertIds = Object.keys(originalAlerts);
|
||||
const currentAlertIds = Object.keys(currentAlerts);
|
||||
const recoveredAlertIds = Object.keys(recoveredAlerts);
|
||||
const newAlertIds = without(currentAlertIds, ...originalAlertIds);
|
||||
|
||||
// Inject start time into instance state of new instances
|
||||
for (const id of newAlertIds) {
|
||||
const state = currentAlerts[id].getState();
|
||||
currentAlerts[id].replaceState({ ...state, start: currentTime });
|
||||
}
|
||||
|
||||
// Calculate duration to date for active instances
|
||||
for (const id of currentAlertIds) {
|
||||
const state = originalAlertIds.includes(id)
|
||||
? originalAlerts[id].getState()
|
||||
: currentAlerts[id].getState();
|
||||
const duration = state.start
|
||||
? (new Date(currentTime).valueOf() - new Date(state.start as string).valueOf()) * 1000 * 1000 // nanoseconds
|
||||
: undefined;
|
||||
currentAlerts[id].replaceState({
|
||||
...state,
|
||||
...(state.start ? { start: state.start } : {}),
|
||||
...(duration !== undefined ? { duration } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
// Inject end time into instance state of recovered instances
|
||||
for (const id of recoveredAlertIds) {
|
||||
const state = recoveredAlerts[id].getState();
|
||||
const duration = state.start
|
||||
? (new Date(currentTime).valueOf() - new Date(state.start as string).valueOf()) * 1000 * 1000 // nanoseconds
|
||||
: undefined;
|
||||
recoveredAlerts[id].replaceState({
|
||||
...state,
|
||||
...(duration ? { duration } : {}),
|
||||
...(state.start ? { end: currentTime } : {}),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
interface GenerateNewAndRecoveredInstanceEventsParams<
|
||||
InstanceState extends AlertInstanceState,
|
||||
InstanceContext extends AlertInstanceContext
|
||||
|
@ -624,38 +685,66 @@ function generateNewAndRecoveredInstanceEvents<
|
|||
for (const id of recoveredAlertInstanceIds) {
|
||||
const { group: actionGroup, subgroup: actionSubgroup } =
|
||||
recoveredAlertInstances[id].getLastScheduledActions() ?? {};
|
||||
const state = recoveredAlertInstances[id].getState();
|
||||
const message = `${params.alertLabel} instance '${id}' has recovered`;
|
||||
logInstanceEvent(id, EVENT_LOG_ACTIONS.recoveredInstance, message, actionGroup, actionSubgroup);
|
||||
logInstanceEvent(
|
||||
id,
|
||||
EVENT_LOG_ACTIONS.recoveredInstance,
|
||||
message,
|
||||
state,
|
||||
actionGroup,
|
||||
actionSubgroup
|
||||
);
|
||||
}
|
||||
|
||||
for (const id of newIds) {
|
||||
const { actionGroup, subgroup: actionSubgroup } =
|
||||
currentAlertInstances[id].getScheduledActionOptions() ?? {};
|
||||
const state = currentAlertInstances[id].getState();
|
||||
const message = `${params.alertLabel} created new instance: '${id}'`;
|
||||
logInstanceEvent(id, EVENT_LOG_ACTIONS.newInstance, message, actionGroup, actionSubgroup);
|
||||
logInstanceEvent(
|
||||
id,
|
||||
EVENT_LOG_ACTIONS.newInstance,
|
||||
message,
|
||||
state,
|
||||
actionGroup,
|
||||
actionSubgroup
|
||||
);
|
||||
}
|
||||
|
||||
for (const id of currentAlertInstanceIds) {
|
||||
const { actionGroup, subgroup: actionSubgroup } =
|
||||
currentAlertInstances[id].getScheduledActionOptions() ?? {};
|
||||
const state = currentAlertInstances[id].getState();
|
||||
const message = `${params.alertLabel} active instance: '${id}' in ${
|
||||
actionSubgroup
|
||||
? `actionGroup(subgroup): '${actionGroup}(${actionSubgroup})'`
|
||||
: `actionGroup: '${actionGroup}'`
|
||||
}`;
|
||||
logInstanceEvent(id, EVENT_LOG_ACTIONS.activeInstance, message, actionGroup, actionSubgroup);
|
||||
logInstanceEvent(
|
||||
id,
|
||||
EVENT_LOG_ACTIONS.activeInstance,
|
||||
message,
|
||||
state,
|
||||
actionGroup,
|
||||
actionSubgroup
|
||||
);
|
||||
}
|
||||
|
||||
function logInstanceEvent(
|
||||
instanceId: string,
|
||||
action: string,
|
||||
message: string,
|
||||
state: InstanceState,
|
||||
group?: string,
|
||||
subgroup?: string
|
||||
) {
|
||||
const event: IEvent = {
|
||||
event: {
|
||||
action,
|
||||
...(state?.start ? { start: state.start as string } : {}),
|
||||
...(state?.end ? { end: state.end as string } : {}),
|
||||
...(state?.duration !== undefined ? { duration: state.duration as number } : {}),
|
||||
},
|
||||
kibana: {
|
||||
alerting: {
|
||||
|
|
|
@ -149,13 +149,17 @@ export default function eventLogTests({ getService }: FtrProviderContext) {
|
|||
});
|
||||
break;
|
||||
case 'new-instance':
|
||||
validateInstanceEvent(event, `created new instance: 'instance'`);
|
||||
validateInstanceEvent(event, `created new instance: 'instance'`, false);
|
||||
break;
|
||||
case 'recovered-instance':
|
||||
validateInstanceEvent(event, `instance 'instance' has recovered`);
|
||||
validateInstanceEvent(event, `instance 'instance' has recovered`, true);
|
||||
break;
|
||||
case 'active-instance':
|
||||
validateInstanceEvent(event, `active instance: 'instance' in actionGroup: 'default'`);
|
||||
validateInstanceEvent(
|
||||
event,
|
||||
`active instance: 'instance' in actionGroup: 'default'`,
|
||||
false
|
||||
);
|
||||
break;
|
||||
// this will get triggered as we add new event actions
|
||||
default:
|
||||
|
@ -163,7 +167,11 @@ export default function eventLogTests({ getService }: FtrProviderContext) {
|
|||
}
|
||||
}
|
||||
|
||||
function validateInstanceEvent(event: IValidatedEvent, subMessage: string) {
|
||||
function validateInstanceEvent(
|
||||
event: IValidatedEvent,
|
||||
subMessage: string,
|
||||
shouldHaveEventEnd: boolean
|
||||
) {
|
||||
validateEvent(event, {
|
||||
spaceId: Spaces.space1.id,
|
||||
savedObjects: [
|
||||
|
@ -172,6 +180,7 @@ export default function eventLogTests({ getService }: FtrProviderContext) {
|
|||
message: `test.patternFiring:${alertId}: 'abc' ${subMessage}`,
|
||||
instanceId: 'instance',
|
||||
actionGroupId: 'default',
|
||||
shouldHaveEventEnd,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
@ -288,10 +297,10 @@ export default function eventLogTests({ getService }: FtrProviderContext) {
|
|||
});
|
||||
break;
|
||||
case 'new-instance':
|
||||
validateInstanceEvent(event, `created new instance: 'instance'`);
|
||||
validateInstanceEvent(event, `created new instance: 'instance'`, false);
|
||||
break;
|
||||
case 'recovered-instance':
|
||||
validateInstanceEvent(event, `instance 'instance' has recovered`);
|
||||
validateInstanceEvent(event, `instance 'instance' has recovered`, true);
|
||||
break;
|
||||
case 'active-instance':
|
||||
expect(
|
||||
|
@ -299,7 +308,8 @@ export default function eventLogTests({ getService }: FtrProviderContext) {
|
|||
).to.be(true);
|
||||
validateInstanceEvent(
|
||||
event,
|
||||
`active instance: 'instance' in actionGroup(subgroup): 'default(${event?.kibana?.alerting?.action_subgroup})'`
|
||||
`active instance: 'instance' in actionGroup(subgroup): 'default(${event?.kibana?.alerting?.action_subgroup})'`,
|
||||
false
|
||||
);
|
||||
break;
|
||||
// this will get triggered as we add new event actions
|
||||
|
@ -308,7 +318,11 @@ export default function eventLogTests({ getService }: FtrProviderContext) {
|
|||
}
|
||||
}
|
||||
|
||||
function validateInstanceEvent(event: IValidatedEvent, subMessage: string) {
|
||||
function validateInstanceEvent(
|
||||
event: IValidatedEvent,
|
||||
subMessage: string,
|
||||
shouldHaveEventEnd: boolean
|
||||
) {
|
||||
validateEvent(event, {
|
||||
spaceId: Spaces.space1.id,
|
||||
savedObjects: [
|
||||
|
@ -317,6 +331,7 @@ export default function eventLogTests({ getService }: FtrProviderContext) {
|
|||
message: `test.patternFiring:${alertId}: 'abc' ${subMessage}`,
|
||||
instanceId: 'instance',
|
||||
actionGroupId: 'default',
|
||||
shouldHaveEventEnd,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
@ -376,6 +391,7 @@ interface ValidateEventLogParams {
|
|||
savedObjects: SavedObject[];
|
||||
outcome?: string;
|
||||
message: string;
|
||||
shouldHaveEventEnd?: boolean;
|
||||
errorMessage?: string;
|
||||
status?: string;
|
||||
actionGroupId?: string;
|
||||
|
@ -385,7 +401,7 @@ interface ValidateEventLogParams {
|
|||
|
||||
export function validateEvent(event: IValidatedEvent, params: ValidateEventLogParams): void {
|
||||
const { spaceId, savedObjects, outcome, message, errorMessage } = params;
|
||||
const { status, actionGroupId, instanceId, reason } = params;
|
||||
const { status, actionGroupId, instanceId, reason, shouldHaveEventEnd } = params;
|
||||
|
||||
if (status) {
|
||||
expect(event?.kibana?.alerting?.status).to.be(status);
|
||||
|
@ -411,16 +427,23 @@ export function validateEvent(event: IValidatedEvent, params: ValidateEventLogPa
|
|||
if (duration !== undefined) {
|
||||
expect(typeof duration).to.be('number');
|
||||
expect(eventStart).to.be.ok();
|
||||
expect(eventEnd).to.be.ok();
|
||||
|
||||
const durationDiff = Math.abs(
|
||||
Math.round(duration! / NANOS_IN_MILLIS) - (eventEnd - eventStart)
|
||||
);
|
||||
if (shouldHaveEventEnd !== false) {
|
||||
expect(eventEnd).to.be.ok();
|
||||
|
||||
// account for rounding errors
|
||||
expect(durationDiff < 1).to.equal(true);
|
||||
expect(eventStart <= eventEnd).to.equal(true);
|
||||
expect(eventEnd <= dateNow).to.equal(true);
|
||||
const durationDiff = Math.abs(
|
||||
Math.round(duration! / NANOS_IN_MILLIS) - (eventEnd - eventStart)
|
||||
);
|
||||
|
||||
// account for rounding errors
|
||||
expect(durationDiff < 1).to.equal(true);
|
||||
expect(eventStart <= eventEnd).to.equal(true);
|
||||
expect(eventEnd <= dateNow).to.equal(true);
|
||||
}
|
||||
|
||||
if (shouldHaveEventEnd === false) {
|
||||
expect(eventEnd).not.to.be.ok();
|
||||
}
|
||||
}
|
||||
|
||||
expect(event?.event?.outcome).to.equal(outcome);
|
||||
|
|
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import expect from '@kbn/expect';
|
||||
import { Spaces } from '../../scenarios';
|
||||
import { getUrlPrefix, getTestAlertData, ObjectRemover, getEventLog } from '../../../common/lib';
|
||||
import { FtrProviderContext } from '../../../common/ftr_provider_context';
|
||||
import { IValidatedEvent } from '../../../../../plugins/event_log/server';
|
||||
|
||||
// eslint-disable-next-line import/no-default-export
|
||||
export default function eventLogAlertTests({ getService }: FtrProviderContext) {
|
||||
const supertest = getService('supertest');
|
||||
const retry = getService('retry');
|
||||
|
||||
describe('eventLog alerts', () => {
|
||||
const objectRemover = new ObjectRemover(supertest);
|
||||
|
||||
after(() => objectRemover.removeAll());
|
||||
|
||||
it('should generate expected alert events for normal operation', async () => {
|
||||
// pattern of when the alert should fire
|
||||
const pattern = {
|
||||
instance: [false, true, true, false, false, true, true, true],
|
||||
};
|
||||
|
||||
const response = await supertest
|
||||
.post(`${getUrlPrefix(Spaces.space1.id)}/api/alerting/rule`)
|
||||
.set('kbn-xsrf', 'foo')
|
||||
.send(
|
||||
getTestAlertData({
|
||||
rule_type_id: 'test.patternFiring',
|
||||
schedule: { interval: '1s' },
|
||||
throttle: null,
|
||||
params: {
|
||||
pattern,
|
||||
},
|
||||
actions: [],
|
||||
})
|
||||
);
|
||||
|
||||
expect(response.status).to.eql(200);
|
||||
const ruleId = response.body.id;
|
||||
objectRemover.add(Spaces.space1.id, ruleId, 'rule', 'alerting');
|
||||
|
||||
// wait for the events we're expecting
|
||||
const events = await retry.try(async () => {
|
||||
return await getEventLog({
|
||||
getService,
|
||||
spaceId: Spaces.space1.id,
|
||||
type: 'alert',
|
||||
id: ruleId,
|
||||
provider: 'alerting',
|
||||
actions: new Map([
|
||||
// make sure the counts of the # of events per type are as expected
|
||||
['execute', { gte: 9 }],
|
||||
['new-instance', { equal: 2 }],
|
||||
['active-instance', { gte: 4 }],
|
||||
['recovered-instance', { equal: 2 }],
|
||||
]),
|
||||
});
|
||||
});
|
||||
|
||||
// filter out the execute event actions
|
||||
const instanceEvents = events.filter(
|
||||
(event: IValidatedEvent) => event?.event?.action !== 'execute'
|
||||
);
|
||||
|
||||
const currentAlertSpan: {
|
||||
alertId?: string;
|
||||
start?: string;
|
||||
durationToDate?: number;
|
||||
} = {};
|
||||
for (let i = 0; i < instanceEvents.length; ++i) {
|
||||
switch (instanceEvents[i]?.event?.action) {
|
||||
case 'new-instance':
|
||||
expect(instanceEvents[i]?.kibana?.alerting?.instance_id).to.equal('instance');
|
||||
// a new alert should generate a unique UUID for the duration of its activeness
|
||||
expect(instanceEvents[i]?.event?.end).to.be(undefined);
|
||||
|
||||
currentAlertSpan.alertId = instanceEvents[i]?.kibana?.alerting?.instance_id;
|
||||
currentAlertSpan.start = instanceEvents[i]?.event?.start;
|
||||
currentAlertSpan.durationToDate = instanceEvents[i]?.event?.duration;
|
||||
break;
|
||||
|
||||
case 'active-instance':
|
||||
expect(instanceEvents[i]?.kibana?.alerting?.instance_id).to.equal('instance');
|
||||
expect(instanceEvents[i]?.event?.start).to.equal(currentAlertSpan.start);
|
||||
expect(instanceEvents[i]?.event?.end).to.be(undefined);
|
||||
|
||||
if (instanceEvents[i]?.event?.duration! !== 0) {
|
||||
expect(instanceEvents[i]?.event?.duration! > currentAlertSpan.durationToDate!).to.be(
|
||||
true
|
||||
);
|
||||
}
|
||||
currentAlertSpan.durationToDate = instanceEvents[i]?.event?.duration;
|
||||
break;
|
||||
|
||||
case 'recovered-instance':
|
||||
expect(instanceEvents[i]?.kibana?.alerting?.instance_id).to.equal('instance');
|
||||
expect(instanceEvents[i]?.event?.start).to.equal(currentAlertSpan.start);
|
||||
expect(instanceEvents[i]?.event?.end).not.to.be(undefined);
|
||||
expect(
|
||||
new Date(instanceEvents[i]?.event?.end!).valueOf() -
|
||||
new Date(instanceEvents[i]?.event?.start!).valueOf()
|
||||
).to.equal(instanceEvents[i]?.event?.duration! / 1000 / 1000);
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
|
@ -76,7 +76,9 @@ export default function createGetAlertStateTests({ getService }: FtrProviderCont
|
|||
expect(alertInstances.length).to.eql(response.body.rule_type_state.runCount);
|
||||
alertInstances.forEach(([key, value], index) => {
|
||||
expect(key).to.eql(`instance-${index}`);
|
||||
expect(value.state).to.eql({ instanceStateValue: true });
|
||||
expect(value.state.instanceStateValue).to.be(true);
|
||||
expect(value.state.start).not.to.be(undefined);
|
||||
expect(value.state.duration).not.to.be(undefined);
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -131,7 +133,9 @@ export default function createGetAlertStateTests({ getService }: FtrProviderCont
|
|||
expect(alertInstances.length).to.eql(response.body.rule_type_state.runCount);
|
||||
alertInstances.forEach(([key, value], index) => {
|
||||
expect(key).to.eql(`instance-${index}`);
|
||||
expect(value.state).to.eql({ instanceStateValue: true });
|
||||
expect(value.state.instanceStateValue).to.be(true);
|
||||
expect(value.state.start).not.to.be(undefined);
|
||||
expect(value.state.duration).not.to.be(undefined);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -37,6 +37,7 @@ export default function alertingTests({ loadTestFile, getService }: FtrProviderC
|
|||
loadTestFile(require.resolve('./builtin_alert_types'));
|
||||
loadTestFile(require.resolve('./mustache_templates.ts'));
|
||||
loadTestFile(require.resolve('./notify_when'));
|
||||
loadTestFile(require.resolve('./event_log_alerts'));
|
||||
|
||||
// note that this test will destroy existing spaces
|
||||
loadTestFile(require.resolve('./migrations'));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue