mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 01:38:56 -04:00
[Alerting] Remove defaultRuleTaskTimeout and set ruleType specific timeout from kibana.yml (#128294)
Read ruleTaskTimeout from kibana.yml
This commit is contained in:
parent
ca81ce8707
commit
c9aad65b67
10 changed files with 146 additions and 27 deletions
|
@ -185,13 +185,6 @@ For example, `20m`, `24h`, `7d`, `1w`. Default: `60s`.
|
|||
`xpack.alerting.maxEphemeralActionsPerAlert`::
|
||||
Sets the number of actions that will be executed ephemerally. To use this, enable ephemeral tasks in task manager first with <<task-manager-settings,`xpack.task_manager.ephemeral_tasks.enabled`>>
|
||||
|
||||
`xpack.alerting.defaultRuleTaskTimeout`::
|
||||
Specifies the default timeout for the all rule types tasks. The time is formatted as:
|
||||
+
|
||||
`<count>[ms,s,m,h,d,w,M,Y]`
|
||||
+
|
||||
For example, `20m`, `24h`, `7d`, `1w`. Default: `5m`.
|
||||
|
||||
`xpack.alerting.cancelAlertsOnRuleTimeout`::
|
||||
Specifies whether to skip writing alerts and scheduling actions if rule execution is cancelled due to timeout. Default: `true`. This setting can be overridden by individual rule types.
|
||||
|
||||
|
@ -207,3 +200,22 @@ Specifies the behavior when a new or changed rule has a schedule interval less t
|
|||
|
||||
`xpack.alerting.rules.execution.actions.max`::
|
||||
Specifies the maximum number of actions that a rule can trigger each time detection checks run.
|
||||
|
||||
`xpack.alerting.rules.execution.timeout`::
|
||||
Specifies the default timeout for tasks associated with all types of rules. The time is formatted as:
|
||||
+
|
||||
`<count>[ms,s,m,h,d,w,M,Y]`
|
||||
+
|
||||
For example, `20m`, `24h`, `7d`, `1w`. Default: `5m`.
|
||||
|
||||
`xpack.alerting.rules.execution.ruleTypeOverrides`::
|
||||
Overrides the configs under `xpack.alerting.rules.execution` for the rule type with the given ID. List the rule identifier and its settings in an array of objects.
|
||||
+
|
||||
For example:
|
||||
```
|
||||
xpack.alerting.rules.execution:
|
||||
timeout: '5m'
|
||||
ruleTypeOverrides:
|
||||
- id: '.index-threshold'
|
||||
timeout: '15m'
|
||||
```
|
|
@ -199,6 +199,8 @@ kibana_vars=(
|
|||
xpack.alerting.invalidateApiKeysTask.interval
|
||||
xpack.alerting.invalidateApiKeysTask.removalDelay
|
||||
xpack.alerting.defaultRuleTaskTimeout
|
||||
xpack.alerting.rules.execution.timeout
|
||||
xpack.alerting.rules.execution.ruleTypeOverrides
|
||||
xpack.alerting.cancelAlertsOnRuleTimeout
|
||||
xpack.alerting.rules.minimumScheduleInterval.value
|
||||
xpack.alerting.rules.minimumScheduleInterval.enforce
|
||||
|
|
|
@ -13,7 +13,6 @@ describe('config validation', () => {
|
|||
expect(configSchema.validate(config)).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"cancelAlertsOnRuleTimeout": true,
|
||||
"defaultRuleTaskTimeout": "5m",
|
||||
"healthCheck": Object {
|
||||
"interval": "60m",
|
||||
},
|
||||
|
|
|
@ -42,7 +42,6 @@ export const configSchema = schema.object({
|
|||
maxEphemeralActionsPerAlert: schema.number({
|
||||
defaultValue: DEFAULT_MAX_EPHEMERAL_ACTIONS_PER_ALERT,
|
||||
}),
|
||||
defaultRuleTaskTimeout: schema.string({ validate: validateDurationSchema, defaultValue: '5m' }),
|
||||
cancelAlertsOnRuleTimeout: schema.boolean({ defaultValue: true }),
|
||||
rules: rulesSchema,
|
||||
});
|
||||
|
|
|
@ -60,5 +60,12 @@ export const config: PluginConfigDescriptor<AlertsConfigType> = {
|
|||
'xpack.alerting.invalidateApiKeysTask.removalDelay',
|
||||
{ level: 'warning' }
|
||||
),
|
||||
renameFromRoot(
|
||||
'xpack.alerting.defaultRuleTaskTimeout',
|
||||
'xpack.alerting.rules.execution.timeout',
|
||||
{
|
||||
level: 'warning',
|
||||
}
|
||||
),
|
||||
],
|
||||
};
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { getRuleTaskTimeout } from './get_rule_task_timeout';
|
||||
import { RulesConfig } from '../config';
|
||||
|
||||
const ruleTypeId = 'test-rule-type-id';
|
||||
const config = {
|
||||
minimumScheduleInterval: {
|
||||
value: '2m',
|
||||
enforce: false,
|
||||
},
|
||||
execution: {
|
||||
timeout: '1m',
|
||||
actions: { max: 1000 },
|
||||
},
|
||||
} as RulesConfig;
|
||||
|
||||
const configWithRuleType = {
|
||||
...config,
|
||||
execution: {
|
||||
...config.execution,
|
||||
ruleTypeOverrides: [
|
||||
{
|
||||
id: ruleTypeId,
|
||||
timeout: '10m',
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
const configWithoutTimeout = {
|
||||
...config,
|
||||
execution: {
|
||||
actions: { max: 1000 },
|
||||
},
|
||||
};
|
||||
|
||||
describe('get rule task timeout', () => {
|
||||
test('returns the rule type specific timeout', () => {
|
||||
expect(getRuleTaskTimeout({ config: configWithRuleType, ruleTypeId })).toBe('10m');
|
||||
});
|
||||
|
||||
test('returns the timeout that applies all the rule types', () => {
|
||||
expect(getRuleTaskTimeout({ config, ruleTypeId })).toBe('1m');
|
||||
});
|
||||
|
||||
test('returns the timeout passed by the plugin', () => {
|
||||
expect(
|
||||
getRuleTaskTimeout({ config: configWithoutTimeout, ruleTaskTimeout: '20m', ruleTypeId })
|
||||
).toBe('20m');
|
||||
});
|
||||
|
||||
test('returns the default timeout', () => {
|
||||
expect(getRuleTaskTimeout({ config: configWithoutTimeout, ruleTypeId })).toBe('5m');
|
||||
});
|
||||
});
|
35
x-pack/plugins/alerting/server/lib/get_rule_task_timeout.ts
Normal file
35
x-pack/plugins/alerting/server/lib/get_rule_task_timeout.ts
Normal file
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { RulesConfig } from '../config';
|
||||
|
||||
const DEFAULT_EXECUTION_TIMEOUT = '5m';
|
||||
|
||||
export const getRuleTaskTimeout = ({
|
||||
config,
|
||||
ruleTaskTimeout,
|
||||
ruleTypeId,
|
||||
}: {
|
||||
config: RulesConfig;
|
||||
ruleTaskTimeout?: string;
|
||||
ruleTypeId: string;
|
||||
}): string => {
|
||||
const ruleTypeConfig = config.execution.ruleTypeOverrides?.find(
|
||||
(ruleType) => ruleTypeId === ruleType.id
|
||||
);
|
||||
|
||||
// First, rule type specific timeout config (ruleTypeOverrides) is applied if it's set in kibana.yml
|
||||
// if not, then timeout for all the rule types is applied if it's set in kibana.yml
|
||||
// if not, ruleTaskTimeout is applied that is passed from the rule type registering plugin
|
||||
// if none of above is set, DEFAULT_EXECUTION_TIMEOUT is applied
|
||||
return (
|
||||
ruleTypeConfig?.timeout ||
|
||||
config.execution.timeout ||
|
||||
ruleTaskTimeout ||
|
||||
DEFAULT_EXECUTION_TIMEOUT
|
||||
);
|
||||
};
|
|
@ -30,7 +30,6 @@ const generateAlertingConfig = (): AlertingConfig => ({
|
|||
removalDelay: '1h',
|
||||
},
|
||||
maxEphemeralActionsPerAlert: 10,
|
||||
defaultRuleTaskTimeout: '5m',
|
||||
cancelAlertsOnRuleTimeout: true,
|
||||
rules: {
|
||||
minimumScheduleInterval: { value: '1m', enforce: false },
|
||||
|
@ -180,6 +179,10 @@ describe('Alerting Plugin', () => {
|
|||
describe('registerType()', () => {
|
||||
let setup: PluginSetupContract;
|
||||
beforeEach(async () => {
|
||||
const context = coreMock.createPluginInitializerContext<AlertingConfig>(
|
||||
generateAlertingConfig()
|
||||
);
|
||||
plugin = new AlertingPlugin(context);
|
||||
setup = await plugin.setup(setupMocks, mockPlugins);
|
||||
});
|
||||
|
||||
|
|
|
@ -66,6 +66,7 @@ import { getSecurityHealth, SecurityHealth } from './lib/get_security_health';
|
|||
import { MonitoringCollectionSetup } from '../../monitoring_collection/server';
|
||||
import { registerNodeCollector, registerClusterCollector, InMemoryMetrics } from './monitoring';
|
||||
import { getExecutionConfigForRuleType } from './lib/get_rules_config';
|
||||
import { getRuleTaskTimeout } from './lib/get_rule_task_timeout';
|
||||
|
||||
export const EVENT_LOG_PROVIDER = 'alerting';
|
||||
export const EVENT_LOG_ACTIONS = {
|
||||
|
@ -282,15 +283,13 @@ export class AlertingPlugin {
|
|||
encryptedSavedObjects: plugins.encryptedSavedObjects,
|
||||
});
|
||||
|
||||
const alertingConfig: AlertingConfig = this.config;
|
||||
|
||||
return {
|
||||
registerType<
|
||||
Params extends AlertTypeParams = AlertTypeParams,
|
||||
ExtractedParams extends AlertTypeParams = AlertTypeParams,
|
||||
State extends AlertTypeState = AlertTypeState,
|
||||
InstanceState extends AlertInstanceState = AlertInstanceState,
|
||||
InstanceContext extends AlertInstanceContext = AlertInstanceContext,
|
||||
registerType: <
|
||||
Params extends AlertTypeParams = never,
|
||||
ExtractedParams extends AlertTypeParams = never,
|
||||
State extends AlertTypeState = never,
|
||||
InstanceState extends AlertInstanceState = never,
|
||||
InstanceContext extends AlertInstanceContext = never,
|
||||
ActionGroupIds extends string = never,
|
||||
RecoveryActionGroupId extends string = never
|
||||
>(
|
||||
|
@ -303,18 +302,21 @@ export class AlertingPlugin {
|
|||
ActionGroupIds,
|
||||
RecoveryActionGroupId
|
||||
>
|
||||
) {
|
||||
) => {
|
||||
if (!(ruleType.minimumLicenseRequired in LICENSE_TYPE)) {
|
||||
throw new Error(`"${ruleType.minimumLicenseRequired}" is not a valid license type`);
|
||||
}
|
||||
ruleType.config = getExecutionConfigForRuleType({
|
||||
config: alertingConfig.rules,
|
||||
config: this.config.rules,
|
||||
ruleTypeId: ruleType.id,
|
||||
});
|
||||
ruleType.ruleTaskTimeout = getRuleTaskTimeout({
|
||||
config: this.config.rules,
|
||||
ruleTaskTimeout: ruleType.ruleTaskTimeout,
|
||||
ruleTypeId: ruleType.id,
|
||||
});
|
||||
ruleType.ruleTaskTimeout =
|
||||
ruleType.ruleTaskTimeout ?? alertingConfig.defaultRuleTaskTimeout;
|
||||
ruleType.cancelAlertsOnRuleTimeout =
|
||||
ruleType.cancelAlertsOnRuleTimeout ?? alertingConfig.cancelAlertsOnRuleTimeout;
|
||||
ruleType.cancelAlertsOnRuleTimeout ?? this.config.cancelAlertsOnRuleTimeout;
|
||||
ruleType.doesSetRecoveryContext = ruleType.doesSetRecoveryContext ?? false;
|
||||
ruleTypeRegistry.register(ruleType);
|
||||
},
|
||||
|
@ -329,7 +331,7 @@ export class AlertingPlugin {
|
|||
);
|
||||
},
|
||||
getConfig: () => {
|
||||
return pick(alertingConfig.rules, 'minimumScheduleInterval');
|
||||
return pick(this.config.rules, 'minimumScheduleInterval');
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
|
@ -132,6 +132,8 @@ export function createExecutionHandler<
|
|||
continue;
|
||||
}
|
||||
|
||||
alertExecutionStore.numberOfTriggeredActions++;
|
||||
|
||||
const namespace = spaceId === 'default' ? {} : { namespace: spaceId };
|
||||
|
||||
const enqueueOptions = {
|
||||
|
@ -165,12 +167,9 @@ export function createExecutionHandler<
|
|||
if (isEphemeralTaskRejectedDueToCapacityError(err)) {
|
||||
await actionsClient.enqueueExecution(enqueueOptions);
|
||||
}
|
||||
} finally {
|
||||
alertExecutionStore.numberOfTriggeredActions++;
|
||||
}
|
||||
} else {
|
||||
await actionsClient.enqueueExecution(enqueueOptions);
|
||||
alertExecutionStore.numberOfTriggeredActions++;
|
||||
}
|
||||
|
||||
const event = createAlertEventLogRecordObject({
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue