[8.x] Store gaps info in event log (#192347) (#208936)

# Backport

This will backport the following commits from `main` to `8.x`:
- [Store gaps info in event log
(#192347)](https://github.com/elastic/kibana/pull/192347)

<!--- Backport version: 9.6.4 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sorenlouv/backport)

<!--BACKPORT [{"author":{"name":"Khristinin
Nikita","email":"nikita.khristinin@elastic.co"},"sourceCommit":{"committedDate":"2025-01-27T09:46:06Z","message":"Store
gaps info in event log (#192347)\n\n## Store gaps in event
log\r\n\r\nSetting new fields hidden behind feature
flag\r\n(`storeGapsInEventLogEnabled`). It should be possible to do
intermediate\r\nrelease, just to update mappings.\r\n\r\nIn this
PR:\r\n\r\n- store gaps in event log\r\n- update event log mappings\r\n-
add gap range and unfilled_gap_ms mappings into rule
`last_run`\r\nmetrics for rule schema\r\n\r\n\r\n### How it
works\r\n\r\nIn this stage we use our current mechanism for gap
detection in security\r\nrules. Instead of just report duration in ms,
we will also report date\r\nrange of the gap. To set this value we use
`RuleMonitoringService` from\r\nthe security rule
execution.\r\n\r\nLater in task runner we check if last run from
`RuleMonitoringService`\r\nhas gap range information. If it there, we
use `AlertingEventLogger` to\r\ncreate Gap document in event
log.\r\n\r\n\r\n\r\nNew field in event log in `kibana.alert.rule.gap`
fields:\r\n`status`: keyword. Gap status \r\n`range`: date_range. Full
gap range\r\n`filled_intervals`: date_range. Filled
intervals\r\n`unfilled_intervals`: date_range. Unfilled
intervals\r\n`in_progress_intervals`: date_range Intervals which
currently have\r\npotential rule executions planned (from manual rule
run)\r\n`total_gap_duration_ms`: long\r\n`filled_gap_duration_ms`:
long\r\n`unfilled_gap_duration_ms`:
long\r\n`in_progress_gap_duration_ms`:
long\r\n\r\n**kibana.alert.rule.execution**: extend execution info with
gap_range\r\n`kibana.alert.rule.execution.gap_range`\r\n\r\n\r\n## Rule
schema\r\n\r\nAdded `unfilled_gap_ms` into rule response schema. We will
calculate\r\ntotal gap duration after each rule execution, and write in
to rule\r\nobject for 1d,3d,7d. It's allow us to use it in Rule
monitoring table,\r\nand be able sort by that field\r\n\r\nI plan add
integrations tests, in next PR's when we will have API to get\r\ngaps
info\r\n\r\n\r\n### How this schema will be used:\r\nMore
details\r\n[here](https://docs.google.com/document/d/1pDJZon-OzUwmk1ENNZJo0QUoSD-MCwy_tn9GCCBX_1Y/edit?tab=t.0)\r\n\r\n---------\r\n\r\nCo-authored-by:
Elastic Machine
<elasticmachine@users.noreply.github.com>\r\nCo-authored-by:
kibanamachine
<42973632+kibanamachine@users.noreply.github.com>","sha":"41aa8f949a2ee9362a378b39b6313135493e6eda","branchLabelMapping":{"^v9.0.0$":"main","^v8.18.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","backport
missing","v9.0.0","backport:prev-minor"],"title":"Store gaps info in
event
log","number":192347,"url":"https://github.com/elastic/kibana/pull/192347","mergeCommit":{"message":"Store
gaps info in event log (#192347)\n\n## Store gaps in event
log\r\n\r\nSetting new fields hidden behind feature
flag\r\n(`storeGapsInEventLogEnabled`). It should be possible to do
intermediate\r\nrelease, just to update mappings.\r\n\r\nIn this
PR:\r\n\r\n- store gaps in event log\r\n- update event log mappings\r\n-
add gap range and unfilled_gap_ms mappings into rule
`last_run`\r\nmetrics for rule schema\r\n\r\n\r\n### How it
works\r\n\r\nIn this stage we use our current mechanism for gap
detection in security\r\nrules. Instead of just report duration in ms,
we will also report date\r\nrange of the gap. To set this value we use
`RuleMonitoringService` from\r\nthe security rule
execution.\r\n\r\nLater in task runner we check if last run from
`RuleMonitoringService`\r\nhas gap range information. If it there, we
use `AlertingEventLogger` to\r\ncreate Gap document in event
log.\r\n\r\n\r\n\r\nNew field in event log in `kibana.alert.rule.gap`
fields:\r\n`status`: keyword. Gap status \r\n`range`: date_range. Full
gap range\r\n`filled_intervals`: date_range. Filled
intervals\r\n`unfilled_intervals`: date_range. Unfilled
intervals\r\n`in_progress_intervals`: date_range Intervals which
currently have\r\npotential rule executions planned (from manual rule
run)\r\n`total_gap_duration_ms`: long\r\n`filled_gap_duration_ms`:
long\r\n`unfilled_gap_duration_ms`:
long\r\n`in_progress_gap_duration_ms`:
long\r\n\r\n**kibana.alert.rule.execution**: extend execution info with
gap_range\r\n`kibana.alert.rule.execution.gap_range`\r\n\r\n\r\n## Rule
schema\r\n\r\nAdded `unfilled_gap_ms` into rule response schema. We will
calculate\r\ntotal gap duration after each rule execution, and write in
to rule\r\nobject for 1d,3d,7d. It's allow us to use it in Rule
monitoring table,\r\nand be able sort by that field\r\n\r\nI plan add
integrations tests, in next PR's when we will have API to get\r\ngaps
info\r\n\r\n\r\n### How this schema will be used:\r\nMore
details\r\n[here](https://docs.google.com/document/d/1pDJZon-OzUwmk1ENNZJo0QUoSD-MCwy_tn9GCCBX_1Y/edit?tab=t.0)\r\n\r\n---------\r\n\r\nCo-authored-by:
Elastic Machine
<elasticmachine@users.noreply.github.com>\r\nCo-authored-by:
kibanamachine
<42973632+kibanamachine@users.noreply.github.com>","sha":"41aa8f949a2ee9362a378b39b6313135493e6eda"}},"sourceBranch":"main","suggestedTargetBranches":[],"targetPullRequestStates":[{"branch":"main","label":"v9.0.0","branchLabelMappingKey":"^v9.0.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/192347","number":192347,"mergeCommit":{"message":"Store
gaps info in event log (#192347)\n\n## Store gaps in event
log\r\n\r\nSetting new fields hidden behind feature
flag\r\n(`storeGapsInEventLogEnabled`). It should be possible to do
intermediate\r\nrelease, just to update mappings.\r\n\r\nIn this
PR:\r\n\r\n- store gaps in event log\r\n- update event log mappings\r\n-
add gap range and unfilled_gap_ms mappings into rule
`last_run`\r\nmetrics for rule schema\r\n\r\n\r\n### How it
works\r\n\r\nIn this stage we use our current mechanism for gap
detection in security\r\nrules. Instead of just report duration in ms,
we will also report date\r\nrange of the gap. To set this value we use
`RuleMonitoringService` from\r\nthe security rule
execution.\r\n\r\nLater in task runner we check if last run from
`RuleMonitoringService`\r\nhas gap range information. If it there, we
use `AlertingEventLogger` to\r\ncreate Gap document in event
log.\r\n\r\n\r\n\r\nNew field in event log in `kibana.alert.rule.gap`
fields:\r\n`status`: keyword. Gap status \r\n`range`: date_range. Full
gap range\r\n`filled_intervals`: date_range. Filled
intervals\r\n`unfilled_intervals`: date_range. Unfilled
intervals\r\n`in_progress_intervals`: date_range Intervals which
currently have\r\npotential rule executions planned (from manual rule
run)\r\n`total_gap_duration_ms`: long\r\n`filled_gap_duration_ms`:
long\r\n`unfilled_gap_duration_ms`:
long\r\n`in_progress_gap_duration_ms`:
long\r\n\r\n**kibana.alert.rule.execution**: extend execution info with
gap_range\r\n`kibana.alert.rule.execution.gap_range`\r\n\r\n\r\n## Rule
schema\r\n\r\nAdded `unfilled_gap_ms` into rule response schema. We will
calculate\r\ntotal gap duration after each rule execution, and write in
to rule\r\nobject for 1d,3d,7d. It's allow us to use it in Rule
monitoring table,\r\nand be able sort by that field\r\n\r\nI plan add
integrations tests, in next PR's when we will have API to get\r\ngaps
info\r\n\r\n\r\n### How this schema will be used:\r\nMore
details\r\n[here](https://docs.google.com/document/d/1pDJZon-OzUwmk1ENNZJo0QUoSD-MCwy_tn9GCCBX_1Y/edit?tab=t.0)\r\n\r\n---------\r\n\r\nCo-authored-by:
Elastic Machine
<elasticmachine@users.noreply.github.com>\r\nCo-authored-by:
kibanamachine
<42973632+kibanamachine@users.noreply.github.com>","sha":"41aa8f949a2ee9362a378b39b6313135493e6eda"}}]}]
BACKPORT-->
This commit is contained in:
Khristinin Nikita 2025-01-30 16:08:09 +01:00 committed by GitHub
parent 65c15e79a2
commit f29b8d51d2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
39 changed files with 771 additions and 13 deletions

View file

@ -1850,6 +1850,25 @@
"nullable": true,
"type": "number"
},
"gap_range": {
"additionalProperties": false,
"nullable": true,
"properties": {
"gte": {
"description": "End of the gap range.",
"type": "string"
},
"lte": {
"description": "Start of the gap range.",
"type": "string"
}
},
"required": [
"lte",
"gte"
],
"type": "object"
},
"total_alerts_created": {
"description": "Total number of alerts created during last rule run.",
"nullable": true,
@ -3024,6 +3043,25 @@
"nullable": true,
"type": "number"
},
"gap_range": {
"additionalProperties": false,
"nullable": true,
"properties": {
"gte": {
"description": "End of the gap range.",
"type": "string"
},
"lte": {
"description": "Start of the gap range.",
"type": "string"
}
},
"required": [
"lte",
"gte"
],
"type": "object"
},
"total_alerts_created": {
"description": "Total number of alerts created during last rule run.",
"nullable": true,
@ -4181,6 +4219,25 @@
"nullable": true,
"type": "number"
},
"gap_range": {
"additionalProperties": false,
"nullable": true,
"properties": {
"gte": {
"description": "End of the gap range.",
"type": "string"
},
"lte": {
"description": "Start of the gap range.",
"type": "string"
}
},
"required": [
"lte",
"gte"
],
"type": "object"
},
"total_alerts_created": {
"description": "Total number of alerts created during last rule run.",
"nullable": true,
@ -5535,6 +5592,25 @@
"nullable": true,
"type": "number"
},
"gap_range": {
"additionalProperties": false,
"nullable": true,
"properties": {
"gte": {
"description": "End of the gap range.",
"type": "string"
},
"lte": {
"description": "Start of the gap range.",
"type": "string"
}
},
"required": [
"lte",
"gte"
],
"type": "object"
},
"total_alerts_created": {
"description": "Total number of alerts created during last rule run.",
"nullable": true,

View file

@ -1168,6 +1168,20 @@ paths:
description: Duration in seconds of rule run gap.
nullable: true
type: number
gap_range:
additionalProperties: false
nullable: true
type: object
properties:
gte:
description: End of the gap range.
type: string
lte:
description: Start of the gap range.
type: string
required:
- lte
- gte
total_alerts_created:
description: Total number of alerts created during last rule run.
nullable: true
@ -2081,6 +2095,20 @@ paths:
description: Duration in seconds of rule run gap.
nullable: true
type: number
gap_range:
additionalProperties: false
nullable: true
type: object
properties:
gte:
description: End of the gap range.
type: string
lte:
description: Start of the gap range.
type: string
required:
- lte
- gte
total_alerts_created:
description: Total number of alerts created during last rule run.
nullable: true
@ -2969,6 +2997,20 @@ paths:
description: Duration in seconds of rule run gap.
nullable: true
type: number
gap_range:
additionalProperties: false
nullable: true
type: object
properties:
gte:
description: End of the gap range.
type: string
lte:
description: Start of the gap range.
type: string
required:
- lte
- gte
total_alerts_created:
description: Total number of alerts created during last rule run.
nullable: true
@ -4017,6 +4059,20 @@ paths:
description: Duration in seconds of rule run gap.
nullable: true
type: number
gap_range:
additionalProperties: false
nullable: true
type: object
properties:
gte:
description: End of the gap range.
type: string
lte:
description: Start of the gap range.
type: string
required:
- lte
- gte
total_alerts_created:
description: Total number of alerts created during last rule run.
nullable: true
@ -44573,6 +44629,19 @@ components:
description: Duration in seconds of execution gap
minimum: 0
type: integer
gap_range:
description: Range of the execution gap
type: object
properties:
gte:
description: Start date of the execution gap
type: string
lte:
description: End date of the execution gap
type: string
required:
- gte
- lte
total_enrichment_duration_ms:
description: Total time spent enriching documents during current rule execution cycle
minimum: 0

View file

@ -1821,6 +1821,20 @@ paths:
description: Duration in seconds of rule run gap.
nullable: true
type: number
gap_range:
additionalProperties: false
nullable: true
type: object
properties:
gte:
description: End of the gap range.
type: string
lte:
description: Start of the gap range.
type: string
required:
- lte
- gte
total_alerts_created:
description: Total number of alerts created during last rule run.
nullable: true
@ -2725,6 +2739,20 @@ paths:
description: Duration in seconds of rule run gap.
nullable: true
type: number
gap_range:
additionalProperties: false
nullable: true
type: object
properties:
gte:
description: End of the gap range.
type: string
lte:
description: Start of the gap range.
type: string
required:
- lte
- gte
total_alerts_created:
description: Total number of alerts created during last rule run.
nullable: true
@ -3604,6 +3632,20 @@ paths:
description: Duration in seconds of rule run gap.
nullable: true
type: number
gap_range:
additionalProperties: false
nullable: true
type: object
properties:
gte:
description: End of the gap range.
type: string
lte:
description: Start of the gap range.
type: string
required:
- lte
- gte
total_alerts_created:
description: Total number of alerts created during last rule run.
nullable: true
@ -4580,6 +4622,20 @@ paths:
description: Duration in seconds of rule run gap.
nullable: true
type: number
gap_range:
additionalProperties: false
nullable: true
type: object
properties:
gte:
description: End of the gap range.
type: string
lte:
description: Start of the gap range.
type: string
required:
- lte
- gte
total_alerts_created:
description: Total number of alerts created during last rule run.
nullable: true
@ -32940,6 +32996,19 @@ components:
description: Duration in seconds of execution gap
minimum: 0
type: integer
gap_range:
description: Range of the execution gap
type: object
properties:
gte:
description: Start date of the execution gap
type: string
lte:
description: End date of the execution gap
type: string
required:
- gte
- lte
total_enrichment_duration_ms:
description: Total time spent enriching documents during current rule execution cycle
minimum: 0

View file

@ -59,7 +59,7 @@ describe('checking migration metadata changes on all registered SO types', () =>
"action": "0e6fc0b74c7312a8c11ff6b14437b93a997358b8",
"action_task_params": "2e475d8b62e2de50b77f58cda309efb537e1d543",
"ad_hoc_run_params": "c7419760e878207231c3c8a25ec4d78360e07bf7",
"alert": "556a03378f5ee1c31593c3a37c66b54555ee14ff",
"alert": "c5a135d2aca71f56103e9ccba00d6675b0586c82",
"api_key_pending_invalidation": "8f5554d1984854011b8392d9a6f7ef985bcac03c",
"apm-custom-dashboards": "b67128f78160c288bd7efe25b2da6e2afd5e82fc",
"apm-indices": "8a2d68d415a4b542b26b0d292034a28ffac6fed4",

View file

@ -144,6 +144,10 @@ export interface RuleMonitoringLastRunMetrics extends SavedObjectAttributes {
total_alerts_detected?: number | null;
total_alerts_created?: number | null;
gap_duration_s?: number | null;
gap_range?: {
gte: string;
lte: string;
} | null;
}
export interface RuleMonitoringLastRun extends SavedObjectAttributes {

View file

@ -407,6 +407,18 @@ export const monitoringSchema = schema.object(
})
)
),
gap_range: schema.maybe(
schema.nullable(
schema.object({
lte: schema.string({
meta: { description: 'Start of the gap range.' },
}),
gte: schema.string({
meta: { description: 'End of the gap range.' },
}),
})
)
),
}),
}),
},

View file

@ -106,4 +106,8 @@ export interface RuleMonitoringLastRunMetrics extends SavedObjectAttributes {
total_alerts_detected?: number | null;
total_alerts_created?: number | null;
gap_duration_s?: number | null;
gap_range?: {
lte: string;
gte: string;
} | null;
}

View file

@ -125,6 +125,14 @@ export const monitoringSchema = schema.object({
total_alerts_detected: schema.maybe(schema.nullable(schema.number())),
total_alerts_created: schema.maybe(schema.nullable(schema.number())),
gap_duration_s: schema.maybe(schema.nullable(schema.number())),
gap_range: schema.maybe(
schema.nullable(
schema.object({
gte: schema.string(),
lte: schema.string(),
})
)
),
}),
}),
}),

View file

@ -19,6 +19,7 @@ const createAlertingEventLoggerMock = () => {
logAlert: jest.fn(),
logAction: jest.fn(),
done: jest.fn(),
reportGap: jest.fn(),
};
});
};

View file

@ -22,6 +22,7 @@ import {
initializeExecuteBackfillRecord,
SavedObjects,
updateEventWithRuleData,
createGapRecord,
} from './alerting_event_logger';
import { UntypedNormalizedRuleType } from '../../rule_type_registry';
import {
@ -1457,6 +1458,32 @@ describe('AlertingEventLogger', () => {
expect(alertingEventLogger.getEvent()!.message).toBe('first failure message');
});
});
describe('reportGap()', () => {
test('should throw error if alertingEventLogger has not been initialized', () => {
expect(() =>
alertingEventLogger.reportGap({
gap: { gte: '', lte: '' },
})
).toThrowErrorMatchingInlineSnapshot(`"AlertingEventLogger not initialized"`);
});
test('should report gap event', () => {
alertingEventLogger.initialize({ context: ruleContext, runDate, ruleData });
const range = {
gte: '2022-05-05T15:59:54.480Z',
lte: '2022-05-05T16:59:54.480Z',
};
alertingEventLogger.reportGap({ gap: range });
const event = createGapRecord(ruleContext, ruleData, [alertSO], {
status: 'unfilled',
range,
});
expect(eventLogger.logEvent).toHaveBeenCalledWith(event);
});
});
});
describe('helper functions', () => {
@ -2065,6 +2092,59 @@ describe('helper functions', () => {
});
});
});
describe('createGapRecord', () => {
test('should populate expected fields in event log record', () => {
const record = createGapRecord(ruleContextWithScheduleDelay, ruleDataWithName, [alertSO], {
status: 'unfilled',
range: {
gte: '2022-05-05T15:59:54.480Z',
lte: '2022-05-05T16:59:54.480Z',
},
});
// these fields should be explicitly set
expect(record.event?.action).toEqual('gap');
expect(record.event?.kind).toEqual('alert');
expect(record.event?.category).toEqual([ruleDataWithName.type?.producer]);
expect(record.kibana?.alert?.rule?.gap?.status).toEqual('unfilled');
expect(record.kibana?.alert?.rule?.gap?.range?.gte).toEqual('2022-05-05T15:59:54.480Z');
expect(record.kibana?.alert?.rule?.gap?.range?.lte).toEqual('2022-05-05T16:59:54.480Z');
expect(record.kibana?.alert?.rule?.rule_type_id).toEqual(ruleDataWithName.type?.id);
expect(record.kibana?.alert?.rule?.consumer).toEqual(ruleDataWithName.consumer);
expect(record.kibana?.alert?.rule?.execution?.uuid).toEqual(
ruleContextWithScheduleDelay.executionId
);
expect(record.kibana?.saved_objects).toEqual([
{
id: ruleContextWithScheduleDelay.savedObjectId,
type: ruleContextWithScheduleDelay.savedObjectType,
type_id: ruleDataWithName.type?.id,
rel: SAVED_OBJECT_REL_PRIMARY,
},
]);
expect(record.kibana?.space_ids).toEqual([ruleContextWithScheduleDelay.spaceId]);
expect(record?.rule?.id).toEqual(ruleDataWithName.id);
expect(record?.rule?.license).toEqual(ruleDataWithName.type?.minimumLicenseRequired);
expect(record?.rule?.category).toEqual(ruleDataWithName.type?.id);
expect(record?.rule?.ruleset).toEqual(ruleDataWithName.type?.producer);
expect(record?.rule?.name).toEqual(ruleDataWithName.name);
// these fields should not be set by this function
expect(record.kibana?.alert?.rule?.gap?.filled_intervals).toEqual(undefined);
expect(record['@timestamp']).toBeUndefined();
expect(record.event?.provider).toBeUndefined();
expect(record.event?.outcome).toBeUndefined();
expect(record.kibana?.alert?.rule?.execution?.metrics).toBeUndefined();
expect(record.kibana?.server_uuid).toBeUndefined();
expect(record.kibana?.task).toBeUndefined();
expect(record.kibana?.version).toBeUndefined();
expect(record?.ecs).toBeUndefined();
});
});
});
function mockEventLoggerStartTiming() {

View file

@ -395,6 +395,26 @@ export class AlertingEventLogger {
this.eventLogger.logEvent(this.event);
}
public reportGap({
gap,
}: {
gap: {
lte: string;
gte: string;
};
}): void {
if (!this.isInitialized || !this.context || !this.ruleData) {
throw new Error('AlertingEventLogger not initialized');
}
this.eventLogger.logEvent(
createGapRecord(this.context, this.ruleData, this.relatedSavedObjects, {
status: 'unfilled',
range: gap,
})
);
}
}
export function createAlertRecord(
@ -488,6 +508,33 @@ export function createExecuteTimeoutRecord(
});
}
export function createGapRecord(
context: ContextOpts,
ruleData: RuleContext,
savedObjects: SavedObjects[],
gap: {
status: string;
range: {
gte: string;
lte: string;
};
}
) {
return createAlertEventLogRecordObject({
ruleId: ruleData?.id,
ruleType: ruleData?.type,
consumer: ruleData?.consumer,
namespace: context.namespace,
spaceId: context.spaceId,
executionId: context.executionId,
action: EVENT_LOG_ACTIONS.gap,
savedObjects,
ruleName: ruleData?.name,
ruleRevision: ruleData?.revision,
gap,
});
}
export function initializeExecuteRecord(
context: Context,
ruleData: RuleContext,

View file

@ -44,6 +44,13 @@ interface CreateAlertEventLogRecordParams {
};
maintenanceWindowIds?: string[];
ruleRevision?: number;
gap?: {
status: string;
range: {
gte: string;
lte: string;
};
};
}
export function createAlertEventLogRecordObject(params: CreateAlertEventLogRecordParams): Event {
@ -64,6 +71,7 @@ export function createAlertEventLogRecordObject(params: CreateAlertEventLogRecor
alertSummary,
maintenanceWindowIds,
ruleRevision,
gap,
} = params;
const alerting =
params.instanceId || group || alertSummary
@ -109,6 +117,7 @@ export function createAlertEventLogRecordObject(params: CreateAlertEventLogRecor
},
}
: {}),
...(gap ? { gap } : {}),
},
},
...(alerting ? alerting : {}),

View file

@ -21,6 +21,8 @@ const INITIAL_LAST_RUN_METRICS: RuleMonitoringLastRunMetrics = {
total_alerts_detected: null,
total_alerts_created: null,
gap_duration_s: null,
// TODO: should initialised field, after inermidiate release
// gap_range: null,
};
export const getDefaultMonitoring = (timestamp: string): RawRuleMonitoring => {

View file

@ -25,6 +25,7 @@ function createPublicRuleMonitoringServiceMock() {
setLastRunMetricsTotalAlertsDetected: jest.fn(),
setLastRunMetricsTotalIndexingDurationMs: jest.fn(),
setLastRunMetricsTotalSearchDurationMs: jest.fn(),
setLastRunMetricsGapRange: jest.fn(),
};
});
}

View file

@ -171,5 +171,22 @@ describe('RuleMonitoringService', () => {
} = ruleMonitoringService.getMonitoring();
expect(metrics.gap_duration_s).toEqual(567);
});
it('should set gapRange', () => {
const ruleMonitoringService = new RuleMonitoringService();
const { setLastRunMetricsGapRange } = ruleMonitoringService.getLastRunMetricsSetters();
setLastRunMetricsGapRange({
gte: '2020-01-01T00:00:00.000Z',
lte: '2020-01-01T01:00:00.000Z',
});
const {
run: {
last_run: { metrics },
},
} = ruleMonitoringService.getMonitoring();
expect(metrics.gap_range?.gte).toEqual('2020-01-01T00:00:00.000Z');
expect(metrics.gap_range?.lte).toEqual('2020-01-01T01:00:00.000Z');
});
});
});

View file

@ -63,6 +63,7 @@ export class RuleMonitoringService {
setLastRunMetricsTotalAlertsDetected: this.setLastRunMetricsTotalAlertsDetected.bind(this),
setLastRunMetricsTotalAlertsCreated: this.setLastRunMetricsTotalAlertsCreated.bind(this),
setLastRunMetricsGapDurationS: this.setLastRunMetricsGapDurationS.bind(this),
setLastRunMetricsGapRange: this.setLastRunMetricsGapRange.bind(this),
};
}
@ -86,6 +87,10 @@ export class RuleMonitoringService {
this.monitoring.run.last_run.metrics.gap_duration_s = gapDurationS;
}
private setLastRunMetricsGapRange(gap: { lte: string; gte: string } | null) {
this.monitoring.run.last_run.metrics.gap_range = gap;
}
private buildExecutionSuccessRatio() {
const { history } = this.monitoring.run;
return history.filter(({ success }) => success).length / history.length;

View file

@ -129,6 +129,7 @@ export const EVENT_LOG_ACTIONS = {
activeInstance: 'active-instance',
executeTimeout: 'execute-timeout',
untrackedInstance: 'untracked-instance',
gap: 'gap',
};
export const LEGACY_EVENT_LOG_ACTIONS = {
resolvedInstance: 'resolved-instance',

View file

@ -6,7 +6,12 @@
*/
import { SavedObjectsModelVersionMap } from '@kbn/core-saved-objects-server';
import { rawRuleSchemaV1, rawRuleSchemaV2, rawRuleSchemaV3 } from '../schemas/raw_rule';
import {
rawRuleSchemaV1,
rawRuleSchemaV2,
rawRuleSchemaV3,
rawRuleSchemaV4,
} from '../schemas/raw_rule';
export const ruleModelVersions: SavedObjectsModelVersionMap = {
'1': {
@ -30,4 +35,11 @@ export const ruleModelVersions: SavedObjectsModelVersionMap = {
create: rawRuleSchemaV3,
},
},
'4': {
changes: [],
schemas: {
forwardCompatibility: rawRuleSchemaV4.extends({}, { unknowns: 'ignore' }),
create: rawRuleSchemaV4,
},
},
};

View file

@ -10,3 +10,4 @@ export * from './latest';
export { rawRuleSchema as rawRuleSchemaV1 } from './v1';
export { rawRuleSchema as rawRuleSchemaV2 } from './v2';
export { rawRuleSchema as rawRuleSchemaV3 } from './v3';
export { rawRuleSchema as rawRuleSchemaV4 } from './v4';

View file

@ -11,10 +11,10 @@ import {
rawRuleActionSchema,
rawRuleAlertsFilterSchema,
rawRuleLastRunSchema,
rawRuleMonitoringSchema,
rawRuleSchema,
} from './v3';
import { rawRuleMonitoringSchema, rawRuleSchema } from './v4';
type Mutable<T> = { -readonly [P in keyof T]: T[P] extends object ? Mutable<T[P]> : T[P] };
export type RawRuleAction = Mutable<TypeOf<typeof rawRuleActionSchema>>;

View file

@ -0,0 +1,57 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { schema } from '@kbn/config-schema';
import { rawRuleSchema as rawRuleSchemaV3 } from './v3';
const outcome = schema.oneOf([
schema.literal('succeeded'),
schema.literal('warning'),
schema.literal('failed'),
]);
export const rawRuleMonitoringSchema = schema.object({
run: schema.object({
history: schema.arrayOf(
schema.object({
success: schema.boolean(),
timestamp: schema.number(),
duration: schema.maybe(schema.number()),
outcome: schema.maybe(outcome),
})
),
calculated_metrics: schema.object({
p50: schema.maybe(schema.number()),
p95: schema.maybe(schema.number()),
p99: schema.maybe(schema.number()),
success_ratio: schema.number(),
}),
last_run: schema.object({
timestamp: schema.string(),
metrics: schema.object({
duration: schema.maybe(schema.number()),
total_search_duration_ms: schema.maybe(schema.nullable(schema.number())),
total_indexing_duration_ms: schema.maybe(schema.nullable(schema.number())),
total_alerts_detected: schema.maybe(schema.nullable(schema.number())),
total_alerts_created: schema.maybe(schema.nullable(schema.number())),
gap_duration_s: schema.maybe(schema.nullable(schema.number())),
gap_range: schema.maybe(
schema.nullable(
schema.object({
gte: schema.string(),
lte: schema.string(),
})
)
),
}),
}),
}),
});
export const rawRuleSchema = rawRuleSchemaV3.extends({
monitoring: schema.maybe(rawRuleMonitoringSchema),
});

View file

@ -91,6 +91,7 @@ import { alertsClientMock } from '../alerts_client/alerts_client.mock';
import { RULE_SAVED_OBJECT_TYPE } from '../saved_objects';
import { getErrorSource } from '@kbn/task-manager-plugin/server/task_running';
import { RuleResultService } from '../monitoring/rule_result_service';
import { RuleMonitoringService } from '../monitoring/rule_monitoring_service';
import { ruleResultServiceMock } from '../monitoring/rule_result_service.mock';
import { backfillClientMock } from '../backfill_client/backfill_client.mock';
import { UntypedNormalizedRuleType } from '../rule_type_registry';
@ -3426,6 +3427,43 @@ describe('Task Runner', () => {
expect(getErrorSource(runnerResult.taskRunError as Error)).toBe(TaskErrorSource.USER);
});
test('when there is a gap, report it to alert event log', async () => {
encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue(mockedRawRuleSO);
jest.spyOn(RuleMonitoringService.prototype, 'getMonitoring').mockImplementation(() => {
return {
run: {
history: [],
calculated_metrics: {
success_ratio: 0,
},
last_run: {
timestamp: '2021-09-01T00:00:00.000Z',
metrics: {
gap_range: {
gte: '2021-09-01T00:00:00.000Z',
lte: '2021-09-01T00:00:00.001Z',
},
},
},
},
};
});
const taskRunner = new TaskRunner({
ruleType,
taskInstance: mockedTaskInstance,
context: taskRunnerFactoryInitializerParams,
inMemoryMetrics,
internalSavedObjectsRepository,
});
await taskRunner.run();
expect(alertingEventLogger.reportGap).toHaveBeenCalledWith({
gap: { gte: '2021-09-01T00:00:00.000Z', lte: '2021-09-01T00:00:00.001Z' },
});
});
test('reschedules when persistAlerts returns a cluster_block_exception', async () => {
const err = new ErrorWithType({
message: 'Index is blocked',

View file

@ -539,7 +539,9 @@ export class TaskRunner<
// Set rule monitoring data
this.ruleMonitoring.setMonitoring(runRuleParams.rule.monitoring);
if (this.ruleMonitoring.getMonitoring()?.run?.last_run?.metrics?.gap_range) {
this.ruleMonitoring.getLastRunMetricsSetters().setLastRunMetricsGapRange(null);
}
(async () => {
try {
await clearExpiredSnoozes({
@ -608,6 +610,13 @@ export class TaskRunner<
runDate: this.runDate,
});
const gap = this.ruleMonitoring.getMonitoring()?.run?.last_run?.metrics?.gap_range;
if (gap) {
this.alertingEventLogger.reportGap({
gap,
});
}
if (!this.cancelled) {
this.inMemoryMetrics.increment(IN_MEMORY_METRICS.RULE_EXECUTIONS);
if (outcome === 'failure') {

View file

@ -400,6 +400,7 @@ export interface PublicMetricsSetters {
setLastRunMetricsTotalAlertsDetected: (totalAlertDetected: number) => void;
setLastRunMetricsTotalAlertsCreated: (totalAlertCreated: number) => void;
setLastRunMetricsGapDurationS: (gapDurationS: number) => void;
setLastRunMetricsGapRange: (gapRange: { lte: string; gte: string } | null) => void;
}
export interface PublicLastRunSetters {

View file

@ -166,7 +166,11 @@ Below is a document in the expected structure, with descriptions of the fields:
total_search_duration_ms: "total time spent performing ES searches as measured by Kibana; includes network latency and time spent serializing/deserializing request/response",
total_indexing_duration_ms: "total time spent indexing documents during current rule execution cycle",
total_enrichment_duration_ms: "total time spent enriching documents during current rule execution cycle",
execution_gap_duration_s: "duration in seconds of execution gap"
execution_gap_duration_s: "duration in seconds of execution gap",
gap_range: {
gte: "Gap range start date",
lte: "Gap range end date",
}
}
}
}

View file

@ -325,6 +325,51 @@
"type": "keyword",
"ignore_above": 1024
},
"gap": {
"properties": {
"status": {
"type": "keyword",
"ignore_above": 1024
},
"range": {
"type": "date_range",
"format": "strict_date_optional_time||epoch_millis"
},
"filled_intervals": {
"type": "date_range",
"format": "strict_date_optional_time||epoch_millis",
"meta": {
"isArray": "true"
}
},
"unfilled_intervals": {
"format": "strict_date_optional_time||epoch_millis",
"type": "date_range",
"meta": {
"isArray": "true"
}
},
"in_progress_intervals": {
"format": "strict_date_optional_time||epoch_millis",
"type": "date_range",
"meta": {
"isArray": "true"
}
},
"total_gap_duration_ms": {
"type": "long"
},
"filled_duration_ms": {
"type": "long"
},
"unfilled_duration_ms": {
"type": "long"
},
"in_progress_duration_ms": {
"type": "long"
}
}
},
"execution": {
"properties": {
"uuid": {
@ -392,6 +437,10 @@
"execution_gap_duration_s": {
"type": "long"
},
"gap_range": {
"type": "date_range",
"format": "strict_date_optional_time||epoch_millis"
},
"rule_type_run_duration_ms": {
"type": "long"
},

View file

@ -146,6 +146,19 @@ export const EventSchema = schema.maybe(
rule: schema.maybe(
schema.object({
consumer: ecsString(),
gap: schema.maybe(
schema.object({
status: ecsString(),
range: ecsDateRange(),
filled_intervals: ecsDateRangeMulti(),
unfilled_intervals: ecsDateRangeMulti(),
in_progress_intervals: ecsDateRangeMulti(),
total_gap_duration_ms: ecsStringOrNumber(),
filled_duration_ms: ecsStringOrNumber(),
unfilled_duration_ms: ecsStringOrNumber(),
in_progress_duration_ms: ecsStringOrNumber(),
})
),
execution: schema.maybe(
schema.object({
uuid: ecsString(),
@ -175,6 +188,7 @@ export const EventSchema = schema.maybe(
es_search_duration_ms: ecsStringOrNumber(),
total_search_duration_ms: ecsStringOrNumber(),
execution_gap_duration_s: ecsStringOrNumber(),
gap_range: ecsDateRange(),
rule_type_run_duration_ms: ecsStringOrNumber(),
process_alerts_duration_ms: ecsStringOrNumber(),
trigger_actions_duration_ms: ecsStringOrNumber(),
@ -272,6 +286,18 @@ function ecsBoolean() {
return schema.maybe(schema.boolean());
}
function ecsDateRangeBase() {
return schema.object({ gte: ecsDate(), lte: ecsDate() });
}
function ecsDateRange() {
return schema.maybe(ecsDateRangeBase());
}
function ecsDateRangeMulti() {
return schema.maybe(schema.arrayOf(ecsDateRangeBase()));
}
const ISO_DATE_PATTERN = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$/;
function validateDate(isoDate: string) {

View file

@ -166,6 +166,15 @@ function generateSchemaLines(lineWriter, prop, mappings) {
return;
}
if (mappings.type === 'date_range') {
if (mappings.meta && mappings.meta.isArray === 'true') {
lineWriter.addLine(`${propKey}: ecsDateRangeMulti(),`);
} else {
lineWriter.addLine(`${propKey}: ecsDateRange(),`);
}
return;
}
// only handling objects for the rest of this function
if (mappings.properties == null) {
logError(`unknown properties to map: ${prop}: ${JSON.stringify(mappings)}`);
@ -333,6 +342,18 @@ function ecsBoolean() {
return schema.maybe(schema.boolean());
}
function ecsDateRangeBase() {
return schema.object({ gte: ecsDate(), lte: ecsDate() });
}
function ecsDateRange() {
return schema.maybe(ecsDateRangeBase());
}
function ecsDateRangeMulti() {
return schema.maybe(schema.arrayOf(ecsDateRangeBase()));
}
const ISO_DATE_PATTERN = /^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{3}Z$/;
function validateDate(isoDate: string) {

View file

@ -100,6 +100,42 @@ exports.EcsCustomPropertyMappings = {
type: 'keyword',
ignore_above: 1024,
},
gap: {
properties: {
status: {
type: 'keyword',
ignore_above: 1024,
},
range: {
type: 'date_range',
format: 'strict_date_optional_time||epoch_millis',
},
filled_intervals: {
type: 'date_range',
format: 'strict_date_optional_time||epoch_millis',
},
unfilled_intervals: {
format: 'strict_date_optional_time||epoch_millis',
type: 'date_range',
},
in_progress_intervals: {
format: 'strict_date_optional_time||epoch_millis',
type: 'date_range',
},
total_gap_duration_ms: {
type: 'long',
},
filled_duration_ms: {
type: 'long',
},
unfilled_duration_ms: {
type: 'long',
},
in_progress_duration_ms: {
type: 'long',
},
},
},
execution: {
properties: {
uuid: {
@ -167,6 +203,10 @@ exports.EcsCustomPropertyMappings = {
execution_gap_duration_s: {
type: 'long',
},
gap_range: {
type: 'date_range',
format: 'strict_date_optional_time||epoch_millis',
},
rule_type_run_duration_ms: {
type: 'long',
},
@ -342,4 +382,7 @@ exports.EcsEventLogMultiValuedProperties = [
'rule.author',
'kibana.space_ids',
'kibana.alert.maintenance_window_ids',
'kibana.alert.rule.gap.in_progress_intervals',
'kibana.alert.rule.gap.filled_intervals',
'kibana.alert.rule.gap.unfilled_intervals',
];

View file

@ -34,4 +34,19 @@ export const RuleExecutionMetrics = z.object({
* Duration in seconds of execution gap
*/
execution_gap_duration_s: z.number().int().min(0).optional(),
/**
* Range of the execution gap
*/
gap_range: z
.object({
/**
* Start date of the execution gap
*/
gte: z.string(),
/**
* End date of the execution gap
*/
lte: z.string(),
})
.optional(),
});

View file

@ -25,3 +25,17 @@ components:
description: Duration in seconds of execution gap
type: integer
minimum: 0
gap_range:
description: Range of the execution gap
type: object
required:
- gte
- lte
properties:
gte:
description: Start date of the execution gap
type: string
lte:
description: End date of the execution gap
type: string

View file

@ -215,6 +215,15 @@ export const allowedExperimentalValues = Object.freeze({
*/
valueListItemsModalEnabled: true,
/**
* Enables the storing of gaps in the event log
*/
manualRuleRunEnabled: false,
/**
* Enables the storing of gaps in the event log
*/
storeGapsInEventLogEnabled: false,
/**
* Adds a new option to filter descendants of a process for Management / Event Filters
*/

View file

@ -5134,6 +5134,19 @@ components:
description: Duration in seconds of execution gap
minimum: 0
type: integer
gap_range:
description: Range of the execution gap
type: object
properties:
gte:
description: Start date of the execution gap
type: string
lte:
description: End date of the execution gap
type: string
required:
- gte
- lte
total_enrichment_duration_ms:
description: >-
Total time spent enriching documents during current rule execution

View file

@ -4283,6 +4283,19 @@ components:
description: Duration in seconds of execution gap
minimum: 0
type: integer
gap_range:
description: Range of the execution gap
type: object
properties:
gte:
description: Start date of the execution gap
type: string
lte:
description: End date of the execution gap
type: string
required:
- gte
- lte
total_enrichment_duration_ms:
description: >-
Total time spent enriching documents during current rule execution

View file

@ -174,6 +174,7 @@ export const createRuleExecutionLogClientForExecutors = (
total_search_duration_ms: totalSearchDurationMs,
total_indexing_duration_ms: totalIndexingDurationMs,
execution_gap_duration_s: executionGapDurationS,
gap_range: gapRange,
} = metrics ?? {};
if (totalSearchDurationMs) {
@ -188,6 +189,10 @@ export const createRuleExecutionLogClientForExecutors = (
ruleMonitoringService.setLastRunMetricsGapDurationS(executionGapDurationS);
}
if (gapRange) {
ruleMonitoringService.setLastRunMetricsGapRange(gapRange);
}
if (newStatus === RuleExecutionStatusEnum.failed) {
ruleResultService.addLastRunError(message, userError ?? false);
} else if (newStatus === RuleExecutionStatusEnum['partial failure']) {
@ -254,6 +259,7 @@ const normalizeStatusChangeArgs = (args: StatusChangeArgs): NormalizedStatusChan
total_indexing_duration_ms: normalizeDurations(metrics.indexingDurations),
total_enrichment_duration_ms: normalizeDurations(metrics.enrichmentDurations),
execution_gap_duration_s: normalizeGap(metrics.executionGap),
gap_range: metrics.gapRange ?? undefined,
}
: undefined,
userError,

View file

@ -130,4 +130,5 @@ export interface MetricsArgs {
indexingDurations?: string[];
enrichmentDurations?: string[];
executionGap?: Duration;
gapRange?: { gte: string; lte: string };
}

View file

@ -308,6 +308,7 @@ export const createSecurityRuleTypeWrapper: CreateSecurityRuleTypeWrapper =
tuples,
remainingGap,
warningStatusMessage: rangeTuplesWarningMessage,
gap,
} = await getRuleRangeTuples({
startedAt,
previousStartedAt,
@ -329,7 +330,10 @@ export const createSecurityRuleTypeWrapper: CreateSecurityRuleTypeWrapper =
await ruleExecutionLogger.logStatusChange({
newStatus: RuleExecutionStatusEnum.failed,
message: gapErrorMessage,
metrics: { executionGap: remainingGap },
metrics: {
executionGap: remainingGap,
gapRange: experimentalFeatures?.storeGapsInEventLogEnabled ? gap : undefined,
},
});
}
@ -508,6 +512,7 @@ export const createSecurityRuleTypeWrapper: CreateSecurityRuleTypeWrapper =
indexingDurations: result.bulkCreateTimes,
enrichmentDurations: result.enrichmentTimes,
executionGap: remainingGap,
gapRange: experimentalFeatures?.storeGapsInEventLogEnabled ? gap : undefined,
},
userError: result.userError,
});

View file

@ -492,9 +492,11 @@ describe('utils', () => {
});
test('should return two tuples if gap and previouslyStartedAt', async () => {
const { tuples, remainingGap, warningStatusMessage } = await getRuleRangeTuples({
previousStartedAt: moment().subtract(65, 's').toDate(),
startedAt: moment().toDate(),
const previouslyStartedAt = moment().subtract(65, 's').toDate();
const startedAt = moment().toDate();
const { tuples, remainingGap, warningStatusMessage, gap } = await getRuleRangeTuples({
previousStartedAt: previouslyStartedAt,
startedAt,
interval: '50s',
from: 'now-55s',
to: 'now',
@ -506,12 +508,15 @@ describe('utils', () => {
expect(moment(someTuple.to).diff(moment(someTuple.from), 's')).toEqual(55);
expect(remainingGap.asMilliseconds()).toEqual(0);
expect(warningStatusMessage).toEqual(undefined);
expect(gap).toEqual(undefined);
});
test('should return five tuples when give long gap', async () => {
const { tuples, remainingGap, warningStatusMessage } = await getRuleRangeTuples({
previousStartedAt: moment().subtract(65, 's').toDate(), // 64 is 5 times the interval + lookback, which will trigger max lookback
startedAt: moment().toDate(),
const previousStartedAt = moment().subtract(65, 's').toDate(); // 64 is 5 times the interval + lookback, which will trigger max lookback
const startedAt = moment().toDate();
const { tuples, remainingGap, warningStatusMessage, gap } = await getRuleRangeTuples({
previousStartedAt,
startedAt,
interval: '10s',
from: 'now-13s',
to: 'now',
@ -530,6 +535,8 @@ describe('utils', () => {
});
expect(remainingGap.asMilliseconds()).toEqual(12000);
expect(warningStatusMessage).toEqual(undefined);
expect(gap?.gte).toEqual(previousStartedAt.toISOString());
expect(gap?.lte).toEqual(moment(previousStartedAt).add(remainingGap, 'ms').toISOString());
});
test('should return a single tuple when give a negative gap (rule ran sooner than expected)', async () => {

View file

@ -517,10 +517,19 @@ export const getRuleRangeTuples = async ({
0
);
let gapRange;
if (remainingGapMilliseconds > 0 && previousStartedAt != null) {
gapRange = {
gte: previousStartedAt.toISOString(),
lte: moment(previousStartedAt).add(remainingGapMilliseconds).toDate().toISOString(),
};
}
return {
tuples: tuples.reverse(),
remainingGap: moment.duration(remainingGapMilliseconds),
warningStatusMessage,
gap: gapRange,
};
};