[ResponseOps][Actions] Improve Task Manager’s retry logic for ad-hoc tasks (#143860)

* Improving task manager retry logic

* Fixing functional tests

* Fixing logic
This commit is contained in:
doakalexi 2022-10-31 10:43:29 -04:00 committed by GitHub
parent ebb9dbdc1a
commit 6f1df849cb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 20 additions and 11 deletions

View file

@ -28,6 +28,7 @@ import apm from 'elastic-apm-node';
import { executionContextServiceMock } from '@kbn/core/server/mocks';
import { usageCountersServiceMock } from '@kbn/usage-collection-plugin/server/usage_counters/usage_counters_service.mock';
import {
calculateDelay,
TASK_MANAGER_RUN_TRANSACTION_TYPE,
TASK_MANAGER_TRANSACTION_TYPE,
TASK_MANAGER_TRANSACTION_TYPE_MARK_AS_RUNNING,
@ -300,9 +301,8 @@ describe('TaskManagerRunner', () => {
expect(instance.attempts).toEqual(initialAttempts + 1);
expect(instance.status).toBe('running');
expect(instance.startedAt!.getTime()).toEqual(Date.now());
expect(instance.retryAt!.getTime()).toEqual(
minutesFromNow((initialAttempts + 1) * 5).getTime() + timeoutMinutes * 60 * 1000
);
const expectedRunAt = Date.now() + calculateDelay(initialAttempts + 1);
expect(instance.retryAt!.getTime()).toEqual(expectedRunAt + timeoutMinutes * 60 * 1000);
expect(instance.enabled).not.toBeDefined();
});
@ -569,7 +569,7 @@ describe('TaskManagerRunner', () => {
sinon.assert.calledWith(getRetryStub, initialAttempts + 1);
const instance = store.update.mock.calls[0][0];
const attemptDelay = (initialAttempts + 1) * 5 * 60 * 1000;
const attemptDelay = calculateDelay(initialAttempts + 1);
const timeoutDelay = timeoutMinutes * 60 * 1000;
expect(instance.retryAt!.getTime()).toEqual(
new Date(Date.now() + attemptDelay + timeoutDelay).getTime()
@ -817,7 +817,8 @@ describe('TaskManagerRunner', () => {
const instance = store.update.mock.calls[0][0];
expect(instance.id).toEqual(id);
expect(instance.runAt.getTime()).toEqual(minutesFromNow(initialAttempts * 5).getTime());
const expectedRunAt = new Date(Date.now() + calculateDelay(initialAttempts));
expect(instance.runAt.getTime()).toEqual(expectedRunAt.getTime());
expect(instance.params).toEqual({ a: 'b' });
expect(instance.state).toEqual({ hey: 'there' });
expect(instance.enabled).not.toBeDefined();
@ -1169,7 +1170,7 @@ describe('TaskManagerRunner', () => {
sinon.assert.calledWith(getRetryStub, initialAttempts, error);
const instance = store.update.mock.calls[0][0];
const expectedRunAt = new Date(Date.now() + initialAttempts * 5 * 60 * 1000);
const expectedRunAt = new Date(Date.now() + calculateDelay(initialAttempts));
expect(instance.runAt.getTime()).toEqual(expectedRunAt.getTime());
expect(instance.enabled).not.toBeDefined();
});

View file

@ -53,8 +53,6 @@ import {
import { TaskTypeDictionary } from '../task_type_dictionary';
import { isUnrecoverableError } from './errors';
import type { EventLoopDelayConfig } from '../config';
const defaultBackoffPerFailure = 5 * 60 * 1000;
export const EMPTY_RUN_RESULT: SuccessfulRunResult = { state: {} };
export const TASK_MANAGER_RUN_TRANSACTION_TYPE = 'task-run';
@ -654,7 +652,7 @@ export class TaskManagerRunner implements TaskRunner {
if (retry instanceof Date) {
result = retry;
} else if (retry === true) {
result = new Date(Date.now() + attempts * defaultBackoffPerFailure);
result = new Date(Date.now() + calculateDelay(attempts));
}
// Add a duration to the result
@ -717,3 +715,13 @@ export function asRan(task: InstanceOf<TaskRunningStage.RAN, RanTask>): RanTask
task,
};
}
export function calculateDelay(attempts: number) {
if (attempts === 1) {
return 30 * 1000; // 30s
} else {
// get multiples of 5 min
const defaultBackoffPerFailure = 5 * 60 * 1000;
return defaultBackoffPerFailure * Math.pow(2, attempts - 2);
}
}

View file

@ -338,9 +338,9 @@ export default function ({ getService }: FtrProviderContext) {
await retry.try(async () => {
const scheduledTask = await currentTask(task.id);
expect(scheduledTask.attempts).to.be.greaterThan(0);
expect(scheduledTask.attempts).to.be.greaterThan(1);
expect(Date.parse(scheduledTask.runAt)).to.be.greaterThan(
Date.parse(task.runAt) + 5 * 60 * 1000
Date.parse(task.runAt) + 30 * 1000
);
});
});