mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 01:13:23 -04:00
[8.15] [Response Ops][Alerting] Deleting ad hoc run task if the last schedule entry ends in a timeout (#187496) (#187810)
# Backport This will backport the following commits from `main` to `8.15`: - [[Response Ops][Alerting] Deleting ad hoc run task if the last schedule entry ends in a timeout (#187496)](https://github.com/elastic/kibana/pull/187496) <!--- Backport version: 9.4.3 --> ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport) <!--BACKPORT [{"author":{"name":"Ying Mao","email":"ying.mao@elastic.co"},"sourceCommit":{"committedDate":"2024-07-09T00:07:57Z","message":"[Response Ops][Alerting] Deleting ad hoc run task if the last schedule entry ends in a timeout (#187496)\n\n## Summary\r\n\r\nFixing bug where ad hoc run task was not getting deleted if running the\r\nlast schedule entry and the run times out.\r\n\r\n## To Verify\r\n1. Create a detection rule\r\n2. Add a delay in the ad hoc task runner\r\n```\r\n--- a/x-pack/plugins/alerting/server/task_runner/ad_hoc_task_runner.ts\r\n+++ b/x-pack/plugins/alerting/server/task_runner/ad_hoc_task_runner.ts\r\n@@ -530,6 +530,7 @@ export class AdHocTaskRunner {\r\n } catch (err) {\r\n runMetrics = asErr(err);\r\n }\r\n+ await new Promise((resolve) => setTimeout(resolve, 3100000));\r\n await this.processAdHocRunResults(runMetrics);\r\n```\r\n3. Schedule a backfill for the rule with only one schedule entry\r\n\r\n```\r\nPOST https://localhost:5601/internal/alerting/rules/backfill/_schedule\r\n[\r\n {\r\n \"rule_id\": <ruleId>,\r\n \"start\": \"2024-07-03T13:05:00.000Z\"\r\n }\r\n]\r\n```\r\n\r\n4. Wait for the run to get cancelled then verify that the task was\r\ndeleted.\r\n\r\n---------\r\n\r\nCo-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>","sha":"c2be810de37479d43661e722e7d7454dacc35c79","branchLabelMapping":{"^v8.16.0$":"main","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["Feature:Alerting","release_note:skip","Team:ResponseOps","v8.15.0","v8.16.0"],"title":"[Response Ops][Alerting] Deleting ad hoc run task if the last schedule entry ends in a timeout","number":187496,"url":"https://github.com/elastic/kibana/pull/187496","mergeCommit":{"message":"[Response Ops][Alerting] Deleting ad hoc run task if the last schedule entry ends in a timeout (#187496)\n\n## Summary\r\n\r\nFixing bug where ad hoc run task was not getting deleted if running the\r\nlast schedule entry and the run times out.\r\n\r\n## To Verify\r\n1. Create a detection rule\r\n2. Add a delay in the ad hoc task runner\r\n```\r\n--- a/x-pack/plugins/alerting/server/task_runner/ad_hoc_task_runner.ts\r\n+++ b/x-pack/plugins/alerting/server/task_runner/ad_hoc_task_runner.ts\r\n@@ -530,6 +530,7 @@ export class AdHocTaskRunner {\r\n } catch (err) {\r\n runMetrics = asErr(err);\r\n }\r\n+ await new Promise((resolve) => setTimeout(resolve, 3100000));\r\n await this.processAdHocRunResults(runMetrics);\r\n```\r\n3. Schedule a backfill for the rule with only one schedule entry\r\n\r\n```\r\nPOST https://localhost:5601/internal/alerting/rules/backfill/_schedule\r\n[\r\n {\r\n \"rule_id\": <ruleId>,\r\n \"start\": \"2024-07-03T13:05:00.000Z\"\r\n }\r\n]\r\n```\r\n\r\n4. Wait for the run to get cancelled then verify that the task was\r\ndeleted.\r\n\r\n---------\r\n\r\nCo-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>","sha":"c2be810de37479d43661e722e7d7454dacc35c79"}},"sourceBranch":"main","suggestedTargetBranches":["8.15"],"targetPullRequestStates":[{"branch":"8.15","label":"v8.15.0","branchLabelMappingKey":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"main","label":"v8.16.0","branchLabelMappingKey":"^v8.16.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/187496","number":187496,"mergeCommit":{"message":"[Response Ops][Alerting] Deleting ad hoc run task if the last schedule entry ends in a timeout (#187496)\n\n## Summary\r\n\r\nFixing bug where ad hoc run task was not getting deleted if running the\r\nlast schedule entry and the run times out.\r\n\r\n## To Verify\r\n1. Create a detection rule\r\n2. Add a delay in the ad hoc task runner\r\n```\r\n--- a/x-pack/plugins/alerting/server/task_runner/ad_hoc_task_runner.ts\r\n+++ b/x-pack/plugins/alerting/server/task_runner/ad_hoc_task_runner.ts\r\n@@ -530,6 +530,7 @@ export class AdHocTaskRunner {\r\n } catch (err) {\r\n runMetrics = asErr(err);\r\n }\r\n+ await new Promise((resolve) => setTimeout(resolve, 3100000));\r\n await this.processAdHocRunResults(runMetrics);\r\n```\r\n3. Schedule a backfill for the rule with only one schedule entry\r\n\r\n```\r\nPOST https://localhost:5601/internal/alerting/rules/backfill/_schedule\r\n[\r\n {\r\n \"rule_id\": <ruleId>,\r\n \"start\": \"2024-07-03T13:05:00.000Z\"\r\n }\r\n]\r\n```\r\n\r\n4. Wait for the run to get cancelled then verify that the task was\r\ndeleted.\r\n\r\n---------\r\n\r\nCo-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>","sha":"c2be810de37479d43661e722e7d7454dacc35c79"}}]}] BACKPORT--> Co-authored-by: Ying Mao <ying.mao@elastic.co>
This commit is contained in:
parent
056d64a179
commit
bcdf5e98e3
2 changed files with 84 additions and 2 deletions
|
@ -1244,6 +1244,84 @@ describe('Ad Hoc Task Runner', () => {
|
|||
expect(logger.error).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test('should handle task cancellation signal due to timeout when for last schedule entry', async () => {
|
||||
encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue({
|
||||
...mockedAdHocRunSO,
|
||||
attributes: {
|
||||
...mockedAdHocRunSO.attributes,
|
||||
schedule: [{ ...schedule1, status: adHocRunStatus.COMPLETE }, schedule2],
|
||||
},
|
||||
});
|
||||
const taskRunner = new AdHocTaskRunner({
|
||||
context: taskRunnerFactoryInitializerParams,
|
||||
internalSavedObjectsRepository,
|
||||
taskInstance: mockedTaskInstance,
|
||||
});
|
||||
|
||||
const promise = taskRunner.run();
|
||||
await Promise.resolve();
|
||||
await taskRunner.cancel();
|
||||
await promise;
|
||||
await taskRunner.cleanup();
|
||||
|
||||
expect(encryptedSavedObjectsClient.getDecryptedAsInternalUser).toHaveBeenCalledWith(
|
||||
AD_HOC_RUN_SAVED_OBJECT_TYPE,
|
||||
'abc',
|
||||
{}
|
||||
);
|
||||
expect(ruleTypeRegistry.get).toHaveBeenCalledWith('siem.queryRule');
|
||||
expect(ruleTypeRegistry.ensureRuleTypeEnabled).toHaveBeenCalledWith('siem.queryRule');
|
||||
expect(mockValidateRuleTypeParams).toHaveBeenCalledWith(
|
||||
mockedAdHocRunSO.attributes.rule.params,
|
||||
ruleTypeWithAlerts.validate.params
|
||||
);
|
||||
|
||||
// @ts-ignore - accessing private variable
|
||||
// should run the first entry in the schedule
|
||||
expect(taskRunner.scheduleToRunIndex).toEqual(1);
|
||||
expect(RuleRunMetricsStore).toHaveBeenCalledTimes(1);
|
||||
expect(ruleTypeWithAlerts.executor).toHaveBeenCalledTimes(1);
|
||||
|
||||
expect(internalSavedObjectsRepository.update).toHaveBeenCalledWith(
|
||||
AD_HOC_RUN_SAVED_OBJECT_TYPE,
|
||||
mockedAdHocRunSO.id,
|
||||
{
|
||||
schedule: [
|
||||
{ ...schedule1, status: adHocRunStatus.COMPLETE },
|
||||
{ ...schedule2, status: adHocRunStatus.TIMEOUT },
|
||||
],
|
||||
},
|
||||
{ namespace: undefined, refresh: false }
|
||||
);
|
||||
|
||||
expect(internalSavedObjectsRepository.delete).toHaveBeenCalledWith(
|
||||
AD_HOC_RUN_SAVED_OBJECT_TYPE,
|
||||
mockedAdHocRunSO.id,
|
||||
{ namespace: undefined, refresh: false }
|
||||
);
|
||||
|
||||
testAlertingEventLogCalls({
|
||||
status: 'ok',
|
||||
timeout: true,
|
||||
backfillRunAt: schedule2.runAt,
|
||||
backfillInterval: schedule2.interval,
|
||||
});
|
||||
expect(logger.debug).toHaveBeenCalledTimes(3);
|
||||
expect(logger.debug).nthCalledWith(
|
||||
1,
|
||||
`Executing ad hoc run for rule test:rule-id for runAt ${schedule2.runAt}`
|
||||
);
|
||||
expect(logger.debug).nthCalledWith(
|
||||
2,
|
||||
`Cancelling execution for ad hoc run with id abc for rule type test with id rule-id - execution exceeded rule type timeout of 3m`
|
||||
);
|
||||
expect(logger.debug).nthCalledWith(
|
||||
3,
|
||||
`Aborting any in-progress ES searches for rule type test with id rule-id`
|
||||
);
|
||||
expect(logger.error).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test('should handle task cancellation that leads to executor throwing error', async () => {
|
||||
ruleTypeWithAlerts.executor.mockImplementationOnce(() => {
|
||||
throw new Error('Search has been aborted due to cancelled execution');
|
||||
|
|
|
@ -20,7 +20,7 @@ import {
|
|||
TaskErrorSource,
|
||||
} from '@kbn/task-manager-plugin/server';
|
||||
import { nanosToMillis } from '@kbn/event-log-plugin/common';
|
||||
import { RunResult } from '@kbn/task-manager-plugin/server/task';
|
||||
import { CancellableTask, RunResult } from '@kbn/task-manager-plugin/server/task';
|
||||
import { AdHocRunStatus, adHocRunStatus } from '../../common/constants';
|
||||
import { RuleRunnerErrorStackTraceLog, RuleTaskStateAndMetrics, TaskRunnerContext } from './types';
|
||||
import { getExecutorServices } from './get_executor_services';
|
||||
|
@ -66,7 +66,7 @@ interface RunParams {
|
|||
validatedParams: RuleTypeParams;
|
||||
}
|
||||
|
||||
export class AdHocTaskRunner {
|
||||
export class AdHocTaskRunner implements CancellableTask {
|
||||
private readonly context: TaskRunnerContext;
|
||||
private readonly executionId: string;
|
||||
private readonly internalSavedObjectsRepository: ISavedObjectsRepository;
|
||||
|
@ -573,6 +573,10 @@ export class AdHocTaskRunner {
|
|||
: undefined,
|
||||
},
|
||||
});
|
||||
this.shouldDeleteTask = !this.hasAnyPendingRuns();
|
||||
|
||||
// cleanup function is not called for timed out tasks
|
||||
await this.cleanup();
|
||||
}
|
||||
|
||||
async cleanup() {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue