mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 09:48:58 -04:00
Revert TM resource based task scheduling issues (#189529)
This commit is contained in:
parent
1f00087e61
commit
32459096ff
70 changed files with 1523 additions and 4985 deletions
|
@ -564,39 +564,6 @@ describe('Create Lifecycle', () => {
|
|||
});
|
||||
});
|
||||
|
||||
test('injects custom cost for certain rule types', () => {
|
||||
const ruleType: RuleType<never, never, never, never, never, 'default', 'recovered', {}> = {
|
||||
id: 'siem.indicatorRule',
|
||||
name: 'Test',
|
||||
actionGroups: [
|
||||
{
|
||||
id: 'default',
|
||||
name: 'Default',
|
||||
},
|
||||
],
|
||||
defaultActionGroupId: 'default',
|
||||
minimumLicenseRequired: 'basic',
|
||||
isExportable: true,
|
||||
executor: jest.fn(),
|
||||
category: 'test',
|
||||
producer: 'alerts',
|
||||
ruleTaskTimeout: '20m',
|
||||
validate: {
|
||||
params: { validate: (params) => params },
|
||||
},
|
||||
};
|
||||
const registry = new RuleTypeRegistry(ruleTypeRegistryParams);
|
||||
registry.register(ruleType);
|
||||
expect(taskManager.registerTaskDefinitions).toHaveBeenCalledTimes(1);
|
||||
expect(taskManager.registerTaskDefinitions.mock.calls[0][0]).toMatchObject({
|
||||
'alerting:siem.indicatorRule': {
|
||||
timeout: '20m',
|
||||
title: 'Test',
|
||||
cost: 10,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test('shallow clones the given rule type', () => {
|
||||
const ruleType: RuleType<never, never, never, never, never, 'default', 'recovered', {}> = {
|
||||
id: 'test',
|
||||
|
|
|
@ -14,7 +14,6 @@ import { Logger } from '@kbn/core/server';
|
|||
import { LicensingPluginSetup } from '@kbn/licensing-plugin/server';
|
||||
import { RunContext, TaskManagerSetupContract } from '@kbn/task-manager-plugin/server';
|
||||
import { stateSchemaByVersion } from '@kbn/alerting-state-types';
|
||||
import { TaskCost } from '@kbn/task-manager-plugin/server/task';
|
||||
import { TaskRunnerFactory } from './task_runner';
|
||||
import {
|
||||
RuleType,
|
||||
|
@ -41,9 +40,6 @@ import { AlertsService } from './alerts_service/alerts_service';
|
|||
import { getRuleTypeIdValidLegacyConsumers } from './rule_type_registry_deprecated_consumers';
|
||||
import { AlertingConfig } from './config';
|
||||
|
||||
const RULE_TYPES_WITH_CUSTOM_COST: Record<string, TaskCost> = {
|
||||
'siem.indicatorRule': TaskCost.ExtraLarge,
|
||||
};
|
||||
export interface ConstructorOptions {
|
||||
config: AlertingConfig;
|
||||
logger: Logger;
|
||||
|
@ -293,8 +289,6 @@ export class RuleTypeRegistry {
|
|||
normalizedRuleType as unknown as UntypedNormalizedRuleType
|
||||
);
|
||||
|
||||
const taskCost: TaskCost | undefined = RULE_TYPES_WITH_CUSTOM_COST[ruleType.id];
|
||||
|
||||
this.taskManager.registerTaskDefinitions({
|
||||
[`alerting:${ruleType.id}`]: {
|
||||
title: ruleType.name,
|
||||
|
@ -316,7 +310,6 @@ export class RuleTypeRegistry {
|
|||
spaceId: schema.string(),
|
||||
consumer: schema.maybe(schema.string()),
|
||||
}),
|
||||
...(taskCost ? { cost: taskCost } : {}),
|
||||
},
|
||||
});
|
||||
|
||||
|
|
|
@ -11,8 +11,6 @@
|
|||
"task_manager"
|
||||
],
|
||||
"optionalPlugins": [
|
||||
"cloud",
|
||||
"serverless",
|
||||
"usageCollection"
|
||||
]
|
||||
}
|
||||
|
|
|
@ -50,7 +50,7 @@ The root `timestamp` is the time in which the summary was exposed (either to the
|
|||
Follow this step-by-step guide to make sense of the stats: https://www.elastic.co/guide/en/kibana/master/task-manager-troubleshooting.html#task-manager-diagnosing-root-cause
|
||||
|
||||
#### The Configuration Section
|
||||
The `configuration` section summarizes Task Manager's current configuration, including dynamic configurations which change over time, such as `poll_interval` and `capacity` which adjust in reaction to changing load on the system.
|
||||
The `configuration` section summarizes Task Manager's current configuration, including dynamic configurations which change over time, such as `poll_interval` and `max_workers` which adjust in reaction to changing load on the system.
|
||||
|
||||
These are "Hot" stats which are updated whenever a change happens in the configuration.
|
||||
|
||||
|
@ -69,8 +69,8 @@ The `runtime` tracks Task Manager's performance as it runs, making note of task
|
|||
These include:
|
||||
- The time it takes a task to run (p50, p90, p95 & p99, using a configurable running average window, `50` by default)
|
||||
- The average _drift_ that tasks experience (p50, p90, p95 & p99, using the same configurable running average window as above). Drift tells us how long after a task's scheduled a task typically executes.
|
||||
- The average _load_ (p50, p90, p95 & p99, using the same configurable running average window as above). Load tells us what percentage of capacity is in use at the end of each polling cycle.
|
||||
- The polling rate (the timestamp of the last time a polling cycle completed), the polling health stats (number of version clashes and mismatches) and the result [`No tasks | Filled task pool | Unexpectedly ran out of capacity`] frequency the past 50 polling cycles (using the same window size as the one used for running averages)
|
||||
- The average _load_ (p50, p90, p95 & p99, using the same configurable running average window as above). Load tells us what percentage of workers is in use at the end of each polling cycle.
|
||||
- The polling rate (the timestamp of the last time a polling cycle completed), the polling health stats (number of version clashes and mismatches) and the result [`No tasks | Filled task pool | Unexpectedly ran out of workers`] frequency the past 50 polling cycles (using the same window size as the one used for running averages)
|
||||
- The `Success | Retry | Failure ratio` by task type. This is different than the workload stats which tell you what's in the queue, but ca't keep track of retries and of non recurring tasks as they're wiped off the index when completed.
|
||||
|
||||
These are "Hot" stats which are updated reactively as Tasks are executed and interacted with.
|
||||
|
|
|
@ -23,6 +23,7 @@ describe('config validation', () => {
|
|||
"warn_threshold": 5000,
|
||||
},
|
||||
"max_attempts": 3,
|
||||
"max_workers": 10,
|
||||
"metrics_reset_interval": 30000,
|
||||
"monitored_aggregated_stats_refresh_rate": 60000,
|
||||
"monitored_stats_health_verbose_log": Object {
|
||||
|
@ -80,6 +81,7 @@ describe('config validation', () => {
|
|||
"warn_threshold": 5000,
|
||||
},
|
||||
"max_attempts": 3,
|
||||
"max_workers": 10,
|
||||
"metrics_reset_interval": 30000,
|
||||
"monitored_aggregated_stats_refresh_rate": 60000,
|
||||
"monitored_stats_health_verbose_log": Object {
|
||||
|
@ -135,6 +137,7 @@ describe('config validation', () => {
|
|||
"warn_threshold": 5000,
|
||||
},
|
||||
"max_attempts": 3,
|
||||
"max_workers": 10,
|
||||
"metrics_reset_interval": 30000,
|
||||
"monitored_aggregated_stats_refresh_rate": 60000,
|
||||
"monitored_stats_health_verbose_log": Object {
|
||||
|
|
|
@ -8,9 +8,6 @@
|
|||
import { schema, TypeOf } from '@kbn/config-schema';
|
||||
|
||||
export const MAX_WORKERS_LIMIT = 100;
|
||||
export const DEFAULT_CAPACITY = 10;
|
||||
export const MAX_CAPACITY = 50;
|
||||
export const MIN_CAPACITY = 5;
|
||||
export const DEFAULT_MAX_WORKERS = 10;
|
||||
export const DEFAULT_POLL_INTERVAL = 3000;
|
||||
export const DEFAULT_VERSION_CONFLICT_THRESHOLD = 80;
|
||||
|
@ -67,8 +64,6 @@ const requestTimeoutsConfig = schema.object({
|
|||
export const configSchema = schema.object(
|
||||
{
|
||||
allow_reading_invalid_state: schema.boolean({ defaultValue: true }),
|
||||
/* The number of normal cost tasks that this Kibana instance will run simultaneously */
|
||||
capacity: schema.maybe(schema.number({ min: MIN_CAPACITY, max: MAX_CAPACITY })),
|
||||
ephemeral_tasks: schema.object({
|
||||
enabled: schema.boolean({ defaultValue: false }),
|
||||
/* How many requests can Task Manager buffer before it rejects new requests. */
|
||||
|
@ -86,12 +81,11 @@ export const configSchema = schema.object(
|
|||
min: 1,
|
||||
}),
|
||||
/* The maximum number of tasks that this Kibana instance will run simultaneously. */
|
||||
max_workers: schema.maybe(
|
||||
schema.number({
|
||||
// disable the task manager rather than trying to specify it with 0 workers
|
||||
min: 1,
|
||||
})
|
||||
),
|
||||
max_workers: schema.number({
|
||||
defaultValue: DEFAULT_MAX_WORKERS,
|
||||
// disable the task manager rather than trying to specify it with 0 workers
|
||||
min: 1,
|
||||
}),
|
||||
/* The interval at which monotonically increasing metrics counters will reset */
|
||||
metrics_reset_interval: schema.number({
|
||||
defaultValue: DEFAULT_METRICS_RESET_INTERVAL,
|
||||
|
|
|
@ -18,7 +18,7 @@ import { v4 as uuidv4 } from 'uuid';
|
|||
import { asTaskPollingCycleEvent, asTaskRunEvent, TaskPersistence } from './task_events';
|
||||
import { TaskRunResult } from './task_running';
|
||||
import { TaskPoolRunResult } from './task_pool';
|
||||
import { TaskPoolMock } from './task_pool/task_pool.mock';
|
||||
import { TaskPoolMock } from './task_pool.mock';
|
||||
import { executionContextServiceMock } from '@kbn/core/server/mocks';
|
||||
import { taskManagerMock } from './mocks';
|
||||
|
||||
|
@ -45,6 +45,7 @@ describe('EphemeralTaskLifecycle', () => {
|
|||
definitions: new TaskTypeDictionary(taskManagerLogger),
|
||||
executionContext,
|
||||
config: {
|
||||
max_workers: 10,
|
||||
max_attempts: 9,
|
||||
poll_interval: 6000000,
|
||||
version_conflict_threshold: 80,
|
||||
|
@ -155,7 +156,7 @@ describe('EphemeralTaskLifecycle', () => {
|
|||
expect(ephemeralTaskLifecycle.attemptToRun(task)).toMatchObject(asOk(task));
|
||||
|
||||
poolCapacity.mockReturnValue({
|
||||
availableCapacity: 10,
|
||||
availableWorkers: 10,
|
||||
});
|
||||
|
||||
lifecycleEvent$.next(
|
||||
|
@ -178,7 +179,7 @@ describe('EphemeralTaskLifecycle', () => {
|
|||
expect(ephemeralTaskLifecycle.attemptToRun(task)).toMatchObject(asOk(task));
|
||||
|
||||
poolCapacity.mockReturnValue({
|
||||
availableCapacity: 10,
|
||||
availableWorkers: 10,
|
||||
});
|
||||
|
||||
lifecycleEvent$.next(
|
||||
|
@ -215,7 +216,7 @@ describe('EphemeralTaskLifecycle', () => {
|
|||
expect(ephemeralTaskLifecycle.attemptToRun(tasks[2])).toMatchObject(asOk(tasks[2]));
|
||||
|
||||
poolCapacity.mockReturnValue({
|
||||
availableCapacity: 2,
|
||||
availableWorkers: 2,
|
||||
});
|
||||
|
||||
lifecycleEvent$.next(
|
||||
|
@ -255,9 +256,9 @@ describe('EphemeralTaskLifecycle', () => {
|
|||
|
||||
// pool has capacity for both
|
||||
poolCapacity.mockReturnValue({
|
||||
availableCapacity: 10,
|
||||
availableWorkers: 10,
|
||||
});
|
||||
pool.getUsedCapacityByType.mockReturnValue(0);
|
||||
pool.getOccupiedWorkersByType.mockReturnValue(0);
|
||||
|
||||
lifecycleEvent$.next(
|
||||
asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed }))
|
||||
|
@ -295,10 +296,10 @@ describe('EphemeralTaskLifecycle', () => {
|
|||
|
||||
// pool has capacity in general
|
||||
poolCapacity.mockReturnValue({
|
||||
availableCapacity: 2,
|
||||
availableWorkers: 2,
|
||||
});
|
||||
// but when we ask how many it has occupied by type - wee always have one worker already occupied by that type
|
||||
pool.getUsedCapacityByType.mockReturnValue(1);
|
||||
pool.getOccupiedWorkersByType.mockReturnValue(1);
|
||||
|
||||
lifecycleEvent$.next(
|
||||
asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed }))
|
||||
|
@ -307,7 +308,7 @@ describe('EphemeralTaskLifecycle', () => {
|
|||
expect(pool.run).toHaveBeenCalledTimes(0);
|
||||
|
||||
// now we release the worker in the pool and cause another cycle in the epheemral queue
|
||||
pool.getUsedCapacityByType.mockReturnValue(0);
|
||||
pool.getOccupiedWorkersByType.mockReturnValue(0);
|
||||
lifecycleEvent$.next(
|
||||
asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed }))
|
||||
);
|
||||
|
@ -355,9 +356,9 @@ describe('EphemeralTaskLifecycle', () => {
|
|||
|
||||
// pool has capacity for all
|
||||
poolCapacity.mockReturnValue({
|
||||
availableCapacity: 10,
|
||||
availableWorkers: 10,
|
||||
});
|
||||
pool.getUsedCapacityByType.mockReturnValue(0);
|
||||
pool.getOccupiedWorkersByType.mockReturnValue(0);
|
||||
|
||||
lifecycleEvent$.next(asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed })));
|
||||
|
||||
|
@ -388,19 +389,19 @@ describe('EphemeralTaskLifecycle', () => {
|
|||
|
||||
expect(ephemeralTaskLifecycle.queuedTasks).toBe(3);
|
||||
poolCapacity.mockReturnValue({
|
||||
availableCapacity: 1,
|
||||
availableWorkers: 1,
|
||||
});
|
||||
lifecycleEvent$.next(asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed })));
|
||||
expect(ephemeralTaskLifecycle.queuedTasks).toBe(2);
|
||||
|
||||
poolCapacity.mockReturnValue({
|
||||
availableCapacity: 1,
|
||||
availableWorkers: 1,
|
||||
});
|
||||
lifecycleEvent$.next(asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed })));
|
||||
expect(ephemeralTaskLifecycle.queuedTasks).toBe(1);
|
||||
|
||||
poolCapacity.mockReturnValue({
|
||||
availableCapacity: 1,
|
||||
availableWorkers: 1,
|
||||
});
|
||||
lifecycleEvent$.next(asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed })));
|
||||
expect(ephemeralTaskLifecycle.queuedTasks).toBe(0);
|
||||
|
|
|
@ -143,13 +143,13 @@ export class EphemeralTaskLifecycle {
|
|||
taskType && this.definitions.get(taskType)?.maxConcurrency
|
||||
? Math.max(
|
||||
Math.min(
|
||||
this.pool.availableCapacity(),
|
||||
this.pool.availableWorkers,
|
||||
this.definitions.get(taskType)!.maxConcurrency! -
|
||||
this.pool.getUsedCapacityByType(taskType)
|
||||
this.pool.getOccupiedWorkersByType(taskType)
|
||||
),
|
||||
0
|
||||
)
|
||||
: this.pool.availableCapacity();
|
||||
: this.pool.availableWorkers;
|
||||
|
||||
private emitEvent = (event: TaskLifecycleEvent) => {
|
||||
this.events$.next(event);
|
||||
|
|
|
@ -55,6 +55,9 @@ export type {
|
|||
|
||||
export const config: PluginConfigDescriptor<TaskManagerConfig> = {
|
||||
schema: configSchema,
|
||||
exposeToUsage: {
|
||||
max_workers: true,
|
||||
},
|
||||
deprecations: ({ deprecate }) => {
|
||||
return [
|
||||
deprecate('ephemeral_tasks.enabled', 'a future version', {
|
||||
|
@ -65,10 +68,6 @@ export const config: PluginConfigDescriptor<TaskManagerConfig> = {
|
|||
level: 'warning',
|
||||
message: `Configuring "xpack.task_manager.ephemeral_tasks.request_capacity" is deprecated and will be removed in a future version. Remove this setting to increase task execution resiliency.`,
|
||||
}),
|
||||
deprecate('max_workers', 'a future version', {
|
||||
level: 'warning',
|
||||
message: `Configuring "xpack.task_manager.max_workers" is deprecated and will be removed in a future version. Remove this setting and use "xpack.task_manager.capacity" instead.`,
|
||||
}),
|
||||
(settings, fromPath, addDeprecation) => {
|
||||
const taskManager = get(settings, fromPath);
|
||||
if (taskManager?.index) {
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
// Jest Snapshot v1, https://goo.gl/fbAQLP
|
||||
|
||||
exports[`Task cost checks detects tasks with cost definitions 1`] = `
|
||||
Array [
|
||||
Object {
|
||||
"cost": 10,
|
||||
"taskType": "alerting:siem.indicatorRule",
|
||||
},
|
||||
]
|
||||
`;
|
|
@ -35,362 +35,164 @@ describe('managed configuration', () => {
|
|||
},
|
||||
};
|
||||
|
||||
beforeEach(async () => {
|
||||
jest.resetAllMocks();
|
||||
clock = sinon.useFakeTimers();
|
||||
|
||||
const context = coreMock.createPluginInitializerContext<TaskManagerConfig>({
|
||||
max_workers: 10,
|
||||
max_attempts: 9,
|
||||
poll_interval: 3000,
|
||||
allow_reading_invalid_state: false,
|
||||
version_conflict_threshold: 80,
|
||||
monitored_aggregated_stats_refresh_rate: 60000,
|
||||
monitored_stats_health_verbose_log: {
|
||||
enabled: false,
|
||||
level: 'debug' as const,
|
||||
warn_delayed_task_start_in_seconds: 60,
|
||||
},
|
||||
monitored_stats_required_freshness: 4000,
|
||||
monitored_stats_running_average_window: 50,
|
||||
request_capacity: 1000,
|
||||
monitored_task_execution_thresholds: {
|
||||
default: {
|
||||
error_threshold: 90,
|
||||
warn_threshold: 80,
|
||||
},
|
||||
custom: {},
|
||||
},
|
||||
ephemeral_tasks: {
|
||||
enabled: true,
|
||||
request_capacity: 10,
|
||||
},
|
||||
unsafe: {
|
||||
exclude_task_types: [],
|
||||
authenticate_background_task_utilization: true,
|
||||
},
|
||||
event_loop_delay: {
|
||||
monitor: true,
|
||||
warn_threshold: 5000,
|
||||
},
|
||||
worker_utilization_running_average_window: 5,
|
||||
metrics_reset_interval: 3000,
|
||||
claim_strategy: 'default',
|
||||
request_timeouts: {
|
||||
update_by_query: 1000,
|
||||
},
|
||||
});
|
||||
logger = context.logger.get('taskManager');
|
||||
|
||||
const taskManager = new TaskManagerPlugin(context);
|
||||
(
|
||||
await taskManager.setup(coreMock.createSetup(), { usageCollection: undefined })
|
||||
).registerTaskDefinitions({
|
||||
foo: {
|
||||
title: 'Foo',
|
||||
createTaskRunner: jest.fn(),
|
||||
},
|
||||
});
|
||||
|
||||
const coreStart = coreMock.createStart();
|
||||
coreStart.elasticsearch = esStart;
|
||||
esStart.client.asInternalUser.child.mockReturnValue(
|
||||
esStart.client.asInternalUser as unknown as Client
|
||||
);
|
||||
coreStart.savedObjects.createInternalRepository.mockReturnValue(savedObjectsClient);
|
||||
taskManagerStart = await taskManager.start(coreStart);
|
||||
|
||||
// force rxjs timers to fire when they are scheduled for setTimeout(0) as the
|
||||
// sinon fake timers cause them to stall
|
||||
clock.tick(0);
|
||||
});
|
||||
|
||||
afterEach(() => clock.restore());
|
||||
|
||||
describe('managed poll interval', () => {
|
||||
beforeEach(async () => {
|
||||
jest.resetAllMocks();
|
||||
clock = sinon.useFakeTimers();
|
||||
test('should lower max workers when Elasticsearch returns 429 error', async () => {
|
||||
savedObjectsClient.create.mockRejectedValueOnce(
|
||||
SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b')
|
||||
);
|
||||
|
||||
const context = coreMock.createPluginInitializerContext<TaskManagerConfig>({
|
||||
capacity: 10,
|
||||
max_attempts: 9,
|
||||
poll_interval: 3000,
|
||||
allow_reading_invalid_state: false,
|
||||
version_conflict_threshold: 80,
|
||||
monitored_aggregated_stats_refresh_rate: 60000,
|
||||
monitored_stats_health_verbose_log: {
|
||||
enabled: false,
|
||||
level: 'debug' as const,
|
||||
warn_delayed_task_start_in_seconds: 60,
|
||||
},
|
||||
monitored_stats_required_freshness: 4000,
|
||||
monitored_stats_running_average_window: 50,
|
||||
request_capacity: 1000,
|
||||
monitored_task_execution_thresholds: {
|
||||
default: {
|
||||
error_threshold: 90,
|
||||
warn_threshold: 80,
|
||||
},
|
||||
custom: {},
|
||||
},
|
||||
ephemeral_tasks: {
|
||||
enabled: true,
|
||||
request_capacity: 10,
|
||||
},
|
||||
unsafe: {
|
||||
exclude_task_types: [],
|
||||
authenticate_background_task_utilization: true,
|
||||
},
|
||||
event_loop_delay: {
|
||||
monitor: true,
|
||||
warn_threshold: 5000,
|
||||
},
|
||||
worker_utilization_running_average_window: 5,
|
||||
metrics_reset_interval: 3000,
|
||||
claim_strategy: 'default',
|
||||
request_timeouts: {
|
||||
update_by_query: 1000,
|
||||
},
|
||||
});
|
||||
logger = context.logger.get('taskManager');
|
||||
// Cause "too many requests" error to be thrown
|
||||
await expect(
|
||||
taskManagerStart.schedule({
|
||||
taskType: 'foo',
|
||||
state: {},
|
||||
params: {},
|
||||
})
|
||||
).rejects.toThrowErrorMatchingInlineSnapshot(`"Too Many Requests"`);
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
|
||||
const taskManager = new TaskManagerPlugin(context);
|
||||
(
|
||||
await taskManager.setup(coreMock.createSetup(), { usageCollection: undefined })
|
||||
).registerTaskDefinitions({
|
||||
foo: {
|
||||
title: 'Foo',
|
||||
createTaskRunner: jest.fn(),
|
||||
},
|
||||
});
|
||||
|
||||
const coreStart = coreMock.createStart();
|
||||
coreStart.elasticsearch = esStart;
|
||||
esStart.client.asInternalUser.child.mockReturnValue(
|
||||
esStart.client.asInternalUser as unknown as Client
|
||||
);
|
||||
coreStart.savedObjects.createInternalRepository.mockReturnValue(savedObjectsClient);
|
||||
taskManagerStart = await taskManager.start(coreStart, {});
|
||||
|
||||
// force rxjs timers to fire when they are scheduled for setTimeout(0) as the
|
||||
// sinon fake timers cause them to stall
|
||||
clock.tick(0);
|
||||
});
|
||||
|
||||
test('should increase poll interval when Elasticsearch returns 429 error', async () => {
|
||||
savedObjectsClient.create.mockRejectedValueOnce(
|
||||
SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b')
|
||||
);
|
||||
|
||||
// Cause "too many requests" error to be thrown
|
||||
await expect(
|
||||
taskManagerStart.schedule({
|
||||
taskType: 'foo',
|
||||
state: {},
|
||||
params: {},
|
||||
})
|
||||
).rejects.toThrowErrorMatchingInlineSnapshot(`"Too Many Requests"`);
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
'Poll interval configuration changing from 3000 to 3600 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith('Task poller now using interval of 3600ms');
|
||||
});
|
||||
|
||||
test('should increase poll interval when Elasticsearch returns "cannot execute [inline] scripts" error', async () => {
|
||||
const childEsClient = esStart.client.asInternalUser.child({}) as jest.Mocked<Client>;
|
||||
childEsClient.search.mockImplementationOnce(async () => {
|
||||
throw inlineScriptError;
|
||||
});
|
||||
|
||||
await expect(taskManagerStart.fetch({})).rejects.toThrowErrorMatchingInlineSnapshot(
|
||||
`"cannot execute [inline] scripts\\" error"`
|
||||
);
|
||||
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
'Poll interval configuration changing from 3000 to 3600 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith('Task poller now using interval of 3600ms');
|
||||
});
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
'Max workers configuration changing from 10 to 8 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith('Task pool now using 10 as the max worker value');
|
||||
});
|
||||
|
||||
describe('managed capacity with default claim strategy', () => {
|
||||
beforeEach(async () => {
|
||||
jest.resetAllMocks();
|
||||
clock = sinon.useFakeTimers();
|
||||
test('should increase poll interval when Elasticsearch returns 429 error', async () => {
|
||||
savedObjectsClient.create.mockRejectedValueOnce(
|
||||
SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b')
|
||||
);
|
||||
|
||||
const context = coreMock.createPluginInitializerContext<TaskManagerConfig>({
|
||||
capacity: 10,
|
||||
max_attempts: 9,
|
||||
poll_interval: 3000,
|
||||
allow_reading_invalid_state: false,
|
||||
version_conflict_threshold: 80,
|
||||
monitored_aggregated_stats_refresh_rate: 60000,
|
||||
monitored_stats_health_verbose_log: {
|
||||
enabled: false,
|
||||
level: 'debug' as const,
|
||||
warn_delayed_task_start_in_seconds: 60,
|
||||
},
|
||||
monitored_stats_required_freshness: 4000,
|
||||
monitored_stats_running_average_window: 50,
|
||||
request_capacity: 1000,
|
||||
monitored_task_execution_thresholds: {
|
||||
default: {
|
||||
error_threshold: 90,
|
||||
warn_threshold: 80,
|
||||
},
|
||||
custom: {},
|
||||
},
|
||||
ephemeral_tasks: {
|
||||
enabled: true,
|
||||
request_capacity: 10,
|
||||
},
|
||||
unsafe: {
|
||||
exclude_task_types: [],
|
||||
authenticate_background_task_utilization: true,
|
||||
},
|
||||
event_loop_delay: {
|
||||
monitor: true,
|
||||
warn_threshold: 5000,
|
||||
},
|
||||
worker_utilization_running_average_window: 5,
|
||||
metrics_reset_interval: 3000,
|
||||
claim_strategy: 'default',
|
||||
request_timeouts: {
|
||||
update_by_query: 1000,
|
||||
},
|
||||
});
|
||||
logger = context.logger.get('taskManager');
|
||||
// Cause "too many requests" error to be thrown
|
||||
await expect(
|
||||
taskManagerStart.schedule({
|
||||
taskType: 'foo',
|
||||
state: {},
|
||||
params: {},
|
||||
})
|
||||
).rejects.toThrowErrorMatchingInlineSnapshot(`"Too Many Requests"`);
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
|
||||
const taskManager = new TaskManagerPlugin(context);
|
||||
(
|
||||
await taskManager.setup(coreMock.createSetup(), { usageCollection: undefined })
|
||||
).registerTaskDefinitions({
|
||||
foo: {
|
||||
title: 'Foo',
|
||||
createTaskRunner: jest.fn(),
|
||||
},
|
||||
});
|
||||
|
||||
const coreStart = coreMock.createStart();
|
||||
coreStart.elasticsearch = esStart;
|
||||
esStart.client.asInternalUser.child.mockReturnValue(
|
||||
esStart.client.asInternalUser as unknown as Client
|
||||
);
|
||||
coreStart.savedObjects.createInternalRepository.mockReturnValue(savedObjectsClient);
|
||||
taskManagerStart = await taskManager.start(coreStart, {});
|
||||
|
||||
// force rxjs timers to fire when they are scheduled for setTimeout(0) as the
|
||||
// sinon fake timers cause them to stall
|
||||
clock.tick(0);
|
||||
});
|
||||
|
||||
test('should lower capacity when Elasticsearch returns 429 error', async () => {
|
||||
savedObjectsClient.create.mockRejectedValueOnce(
|
||||
SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b')
|
||||
);
|
||||
|
||||
// Cause "too many requests" error to be thrown
|
||||
await expect(
|
||||
taskManagerStart.schedule({
|
||||
taskType: 'foo',
|
||||
state: {},
|
||||
params: {},
|
||||
})
|
||||
).rejects.toThrowErrorMatchingInlineSnapshot(`"Too Many Requests"`);
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Capacity configuration is temporarily reduced after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
'Capacity configuration changing from 10 to 8 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
'Task pool now using 10 as the max worker value which is based on a capacity of 10'
|
||||
);
|
||||
});
|
||||
|
||||
test('should lower capacity when Elasticsearch returns "cannot execute [inline] scripts" error', async () => {
|
||||
const childEsClient = esStart.client.asInternalUser.child({}) as jest.Mocked<Client>;
|
||||
childEsClient.search.mockImplementationOnce(async () => {
|
||||
throw inlineScriptError;
|
||||
});
|
||||
|
||||
await expect(taskManagerStart.fetch({})).rejects.toThrowErrorMatchingInlineSnapshot(
|
||||
`"cannot execute [inline] scripts\\" error"`
|
||||
);
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Capacity configuration is temporarily reduced after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
'Capacity configuration changing from 10 to 8 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
'Task pool now using 10 as the max worker value which is based on a capacity of 10'
|
||||
);
|
||||
});
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
'Poll interval configuration changing from 3000 to 3600 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith('Task poller now using interval of 3600ms');
|
||||
});
|
||||
|
||||
describe('managed capacity with mget claim strategy', () => {
|
||||
beforeEach(async () => {
|
||||
jest.resetAllMocks();
|
||||
clock = sinon.useFakeTimers();
|
||||
|
||||
const context = coreMock.createPluginInitializerContext<TaskManagerConfig>({
|
||||
capacity: 10,
|
||||
max_attempts: 9,
|
||||
poll_interval: 3000,
|
||||
allow_reading_invalid_state: false,
|
||||
version_conflict_threshold: 80,
|
||||
monitored_aggregated_stats_refresh_rate: 60000,
|
||||
monitored_stats_health_verbose_log: {
|
||||
enabled: false,
|
||||
level: 'debug' as const,
|
||||
warn_delayed_task_start_in_seconds: 60,
|
||||
},
|
||||
monitored_stats_required_freshness: 4000,
|
||||
monitored_stats_running_average_window: 50,
|
||||
request_capacity: 1000,
|
||||
monitored_task_execution_thresholds: {
|
||||
default: {
|
||||
error_threshold: 90,
|
||||
warn_threshold: 80,
|
||||
},
|
||||
custom: {},
|
||||
},
|
||||
ephemeral_tasks: {
|
||||
enabled: true,
|
||||
request_capacity: 10,
|
||||
},
|
||||
unsafe: {
|
||||
exclude_task_types: [],
|
||||
authenticate_background_task_utilization: true,
|
||||
},
|
||||
event_loop_delay: {
|
||||
monitor: true,
|
||||
warn_threshold: 5000,
|
||||
},
|
||||
worker_utilization_running_average_window: 5,
|
||||
metrics_reset_interval: 3000,
|
||||
claim_strategy: 'unsafe_mget',
|
||||
request_timeouts: {
|
||||
update_by_query: 1000,
|
||||
},
|
||||
});
|
||||
logger = context.logger.get('taskManager');
|
||||
|
||||
const taskManager = new TaskManagerPlugin(context);
|
||||
(
|
||||
await taskManager.setup(coreMock.createSetup(), { usageCollection: undefined })
|
||||
).registerTaskDefinitions({
|
||||
foo: {
|
||||
title: 'Foo',
|
||||
createTaskRunner: jest.fn(),
|
||||
},
|
||||
});
|
||||
|
||||
const coreStart = coreMock.createStart();
|
||||
coreStart.elasticsearch = esStart;
|
||||
esStart.client.asInternalUser.child.mockReturnValue(
|
||||
esStart.client.asInternalUser as unknown as Client
|
||||
);
|
||||
coreStart.savedObjects.createInternalRepository.mockReturnValue(savedObjectsClient);
|
||||
taskManagerStart = await taskManager.start(coreStart, {});
|
||||
|
||||
// force rxjs timers to fire when they are scheduled for setTimeout(0) as the
|
||||
// sinon fake timers cause them to stall
|
||||
clock.tick(0);
|
||||
test('should lower max workers when Elasticsearch returns "cannot execute [inline] scripts" error', async () => {
|
||||
const childEsClient = esStart.client.asInternalUser.child({}) as jest.Mocked<Client>;
|
||||
childEsClient.search.mockImplementationOnce(async () => {
|
||||
throw inlineScriptError;
|
||||
});
|
||||
|
||||
test('should lower capacity when Elasticsearch returns 429 error', async () => {
|
||||
savedObjectsClient.create.mockRejectedValueOnce(
|
||||
SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b')
|
||||
);
|
||||
await expect(taskManagerStart.fetch({})).rejects.toThrowErrorMatchingInlineSnapshot(
|
||||
`"cannot execute [inline] scripts\\" error"`
|
||||
);
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
|
||||
// Cause "too many requests" error to be thrown
|
||||
await expect(
|
||||
taskManagerStart.schedule({
|
||||
taskType: 'foo',
|
||||
state: {},
|
||||
params: {},
|
||||
})
|
||||
).rejects.toThrowErrorMatchingInlineSnapshot(`"Too Many Requests"`);
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
'Max workers configuration changing from 10 to 8 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith('Task pool now using 10 as the max worker value');
|
||||
});
|
||||
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Capacity configuration is temporarily reduced after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
'Capacity configuration changing from 10 to 8 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
'Task pool now using 20 as the max allowed cost which is based on a capacity of 10'
|
||||
);
|
||||
test('should increase poll interval when Elasticsearch returns "cannot execute [inline] scripts" error', async () => {
|
||||
const childEsClient = esStart.client.asInternalUser.child({}) as jest.Mocked<Client>;
|
||||
childEsClient.search.mockImplementationOnce(async () => {
|
||||
throw inlineScriptError;
|
||||
});
|
||||
|
||||
test('should lower capacity when Elasticsearch returns "cannot execute [inline] scripts" error', async () => {
|
||||
const childEsClient = esStart.client.asInternalUser.child({}) as jest.Mocked<Client>;
|
||||
childEsClient.search.mockImplementationOnce(async () => {
|
||||
throw inlineScriptError;
|
||||
});
|
||||
await expect(taskManagerStart.fetch({})).rejects.toThrowErrorMatchingInlineSnapshot(
|
||||
`"cannot execute [inline] scripts\\" error"`
|
||||
);
|
||||
|
||||
await expect(taskManagerStart.fetch({})).rejects.toThrowErrorMatchingInlineSnapshot(
|
||||
`"cannot execute [inline] scripts\\" error"`
|
||||
);
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Capacity configuration is temporarily reduced after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
'Capacity configuration changing from 10 to 8 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
'Task pool now using 20 as the max allowed cost which is based on a capacity of 10'
|
||||
);
|
||||
});
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
'Poll interval configuration changing from 3000 to 3600 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith('Task poller now using interval of 3600ms');
|
||||
});
|
||||
});
|
||||
|
|
|
@ -1,63 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import {
|
||||
type TestElasticsearchUtils,
|
||||
type TestKibanaUtils,
|
||||
} from '@kbn/core-test-helpers-kbn-server';
|
||||
import { TaskCost, TaskDefinition } from '../task';
|
||||
import { setupTestServers } from './lib';
|
||||
import { TaskTypeDictionary } from '../task_type_dictionary';
|
||||
|
||||
jest.mock('../task_type_dictionary', () => {
|
||||
const actual = jest.requireActual('../task_type_dictionary');
|
||||
return {
|
||||
...actual,
|
||||
TaskTypeDictionary: jest.fn().mockImplementation((opts) => {
|
||||
return new actual.TaskTypeDictionary(opts);
|
||||
}),
|
||||
};
|
||||
});
|
||||
|
||||
// Notify response-ops if a task sets a cost to something other than `Normal`
|
||||
describe('Task cost checks', () => {
|
||||
let esServer: TestElasticsearchUtils;
|
||||
let kibanaServer: TestKibanaUtils;
|
||||
let taskTypeDictionary: TaskTypeDictionary;
|
||||
|
||||
beforeAll(async () => {
|
||||
const setupResult = await setupTestServers();
|
||||
esServer = setupResult.esServer;
|
||||
kibanaServer = setupResult.kibanaServer;
|
||||
|
||||
const mockedTaskTypeDictionary = jest.requireMock('../task_type_dictionary');
|
||||
expect(mockedTaskTypeDictionary.TaskTypeDictionary).toHaveBeenCalledTimes(1);
|
||||
taskTypeDictionary = mockedTaskTypeDictionary.TaskTypeDictionary.mock.results[0].value;
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
if (kibanaServer) {
|
||||
await kibanaServer.stop();
|
||||
}
|
||||
if (esServer) {
|
||||
await esServer.stop();
|
||||
}
|
||||
});
|
||||
|
||||
it('detects tasks with cost definitions', async () => {
|
||||
const taskTypes = taskTypeDictionary.getAllDefinitions();
|
||||
const taskTypesWithCost = taskTypes
|
||||
.map((taskType: TaskDefinition) =>
|
||||
!!taskType.cost ? { taskType: taskType.type, cost: taskType.cost } : null
|
||||
)
|
||||
.filter(
|
||||
(tt: { taskType: string; cost: TaskCost } | null) =>
|
||||
null != tt && tt.cost !== TaskCost.Normal
|
||||
);
|
||||
expect(taskTypesWithCost).toMatchSnapshot();
|
||||
});
|
||||
});
|
|
@ -16,6 +16,7 @@ Date.now = jest.fn().mockReturnValue(new Date(now));
|
|||
const logger = loggingSystemMock.create().get();
|
||||
const config = {
|
||||
enabled: true,
|
||||
max_workers: 10,
|
||||
index: 'foo',
|
||||
max_attempts: 9,
|
||||
poll_interval: 3000,
|
||||
|
@ -72,8 +73,6 @@ const getStatsWithTimestamp = ({
|
|||
configuration: {
|
||||
timestamp,
|
||||
value: {
|
||||
capacity: { config: 10, as_cost: 20, as_workers: 10 },
|
||||
claim_strategy: 'default',
|
||||
request_capacity: 1000,
|
||||
monitored_aggregated_stats_refresh_rate: 5000,
|
||||
monitored_stats_running_average_window: 50,
|
||||
|
@ -85,6 +84,7 @@ const getStatsWithTimestamp = ({
|
|||
},
|
||||
},
|
||||
poll_interval: 3000,
|
||||
max_workers: 10,
|
||||
},
|
||||
status: HealthStatus.OK,
|
||||
},
|
||||
|
@ -213,29 +213,24 @@ const getStatsWithTimestamp = ({
|
|||
timestamp,
|
||||
value: {
|
||||
count: 2,
|
||||
cost: 4,
|
||||
task_types: {
|
||||
taskType1: {
|
||||
count: 1,
|
||||
cost: 2,
|
||||
status: {
|
||||
idle: 1,
|
||||
},
|
||||
},
|
||||
taskType2: {
|
||||
count: 1,
|
||||
cost: 2,
|
||||
status: {
|
||||
idle: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
non_recurring: 2,
|
||||
non_recurring_cost: 4,
|
||||
owner_ids: 0,
|
||||
schedule: [['5m', 2]],
|
||||
overdue: 0,
|
||||
overdue_cost: 0,
|
||||
overdue_non_recurring: 0,
|
||||
estimated_schedule_density: [
|
||||
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
|
||||
|
|
|
@ -13,7 +13,6 @@ import {
|
|||
ADJUST_THROUGHPUT_INTERVAL,
|
||||
} from './create_managed_configuration';
|
||||
import { mockLogger } from '../test_utils';
|
||||
import { CLAIM_STRATEGY_DEFAULT, CLAIM_STRATEGY_MGET, TaskManagerConfig } from '../config';
|
||||
|
||||
describe('createManagedConfiguration()', () => {
|
||||
let clock: sinon.SinonFakeTimers;
|
||||
|
@ -27,141 +26,51 @@ describe('createManagedConfiguration()', () => {
|
|||
afterEach(() => clock.restore());
|
||||
|
||||
test('returns observables with initialized values', async () => {
|
||||
const capacitySubscription = jest.fn();
|
||||
const maxWorkersSubscription = jest.fn();
|
||||
const pollIntervalSubscription = jest.fn();
|
||||
const { capacityConfiguration$, pollIntervalConfiguration$ } = createManagedConfiguration({
|
||||
const { maxWorkersConfiguration$, pollIntervalConfiguration$ } = createManagedConfiguration({
|
||||
logger,
|
||||
errors$: new Subject<Error>(),
|
||||
config: {
|
||||
capacity: 20,
|
||||
poll_interval: 2,
|
||||
} as TaskManagerConfig,
|
||||
startingMaxWorkers: 1,
|
||||
startingPollInterval: 2,
|
||||
});
|
||||
capacityConfiguration$.subscribe(capacitySubscription);
|
||||
maxWorkersConfiguration$.subscribe(maxWorkersSubscription);
|
||||
pollIntervalConfiguration$.subscribe(pollIntervalSubscription);
|
||||
expect(capacitySubscription).toHaveBeenCalledTimes(1);
|
||||
expect(capacitySubscription).toHaveBeenNthCalledWith(1, 20);
|
||||
expect(maxWorkersSubscription).toHaveBeenCalledTimes(1);
|
||||
expect(maxWorkersSubscription).toHaveBeenNthCalledWith(1, 1);
|
||||
expect(pollIntervalSubscription).toHaveBeenCalledTimes(1);
|
||||
expect(pollIntervalSubscription).toHaveBeenNthCalledWith(1, 2);
|
||||
});
|
||||
|
||||
test('uses max_workers config as capacity if only max workers is defined', async () => {
|
||||
const capacitySubscription = jest.fn();
|
||||
const pollIntervalSubscription = jest.fn();
|
||||
const { capacityConfiguration$, pollIntervalConfiguration$ } = createManagedConfiguration({
|
||||
logger,
|
||||
errors$: new Subject<Error>(),
|
||||
config: {
|
||||
max_workers: 10,
|
||||
poll_interval: 2,
|
||||
} as TaskManagerConfig,
|
||||
});
|
||||
capacityConfiguration$.subscribe(capacitySubscription);
|
||||
pollIntervalConfiguration$.subscribe(pollIntervalSubscription);
|
||||
expect(capacitySubscription).toHaveBeenCalledTimes(1);
|
||||
expect(capacitySubscription).toHaveBeenNthCalledWith(1, 10);
|
||||
expect(pollIntervalSubscription).toHaveBeenCalledTimes(1);
|
||||
expect(pollIntervalSubscription).toHaveBeenNthCalledWith(1, 2);
|
||||
});
|
||||
|
||||
test('uses max_workers config as capacity but does not exceed MAX_CAPACITY', async () => {
|
||||
const capacitySubscription = jest.fn();
|
||||
const pollIntervalSubscription = jest.fn();
|
||||
const { capacityConfiguration$, pollIntervalConfiguration$ } = createManagedConfiguration({
|
||||
logger,
|
||||
errors$: new Subject<Error>(),
|
||||
config: {
|
||||
max_workers: 1000,
|
||||
poll_interval: 2,
|
||||
} as TaskManagerConfig,
|
||||
});
|
||||
capacityConfiguration$.subscribe(capacitySubscription);
|
||||
pollIntervalConfiguration$.subscribe(pollIntervalSubscription);
|
||||
expect(capacitySubscription).toHaveBeenCalledTimes(1);
|
||||
expect(capacitySubscription).toHaveBeenNthCalledWith(1, 50);
|
||||
expect(pollIntervalSubscription).toHaveBeenCalledTimes(1);
|
||||
expect(pollIntervalSubscription).toHaveBeenNthCalledWith(1, 2);
|
||||
});
|
||||
|
||||
test('uses provided defaultCapacity if neither capacity nor max_workers is defined', async () => {
|
||||
const capacitySubscription = jest.fn();
|
||||
const pollIntervalSubscription = jest.fn();
|
||||
const { capacityConfiguration$, pollIntervalConfiguration$ } = createManagedConfiguration({
|
||||
defaultCapacity: 500,
|
||||
logger,
|
||||
errors$: new Subject<Error>(),
|
||||
config: {
|
||||
poll_interval: 2,
|
||||
} as TaskManagerConfig,
|
||||
});
|
||||
capacityConfiguration$.subscribe(capacitySubscription);
|
||||
pollIntervalConfiguration$.subscribe(pollIntervalSubscription);
|
||||
expect(capacitySubscription).toHaveBeenCalledTimes(1);
|
||||
expect(capacitySubscription).toHaveBeenNthCalledWith(1, 500);
|
||||
expect(pollIntervalSubscription).toHaveBeenCalledTimes(1);
|
||||
expect(pollIntervalSubscription).toHaveBeenNthCalledWith(1, 2);
|
||||
});
|
||||
|
||||
test('logs warning and uses capacity config if both capacity and max_workers is defined', async () => {
|
||||
const capacitySubscription = jest.fn();
|
||||
const pollIntervalSubscription = jest.fn();
|
||||
const { capacityConfiguration$, pollIntervalConfiguration$ } = createManagedConfiguration({
|
||||
logger,
|
||||
errors$: new Subject<Error>(),
|
||||
config: {
|
||||
capacity: 30,
|
||||
max_workers: 10,
|
||||
poll_interval: 2,
|
||||
} as TaskManagerConfig,
|
||||
});
|
||||
capacityConfiguration$.subscribe(capacitySubscription);
|
||||
pollIntervalConfiguration$.subscribe(pollIntervalSubscription);
|
||||
expect(capacitySubscription).toHaveBeenCalledTimes(1);
|
||||
expect(capacitySubscription).toHaveBeenNthCalledWith(1, 30);
|
||||
expect(pollIntervalSubscription).toHaveBeenCalledTimes(1);
|
||||
expect(pollIntervalSubscription).toHaveBeenNthCalledWith(1, 2);
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
`Both \"xpack.task_manager.capacity\" and \"xpack.task_manager.max_workers\" configs are set, max_workers will be ignored in favor of capacity and the setting should be removed.`
|
||||
);
|
||||
});
|
||||
|
||||
test(`skips errors that aren't about too many requests`, async () => {
|
||||
const capacitySubscription = jest.fn();
|
||||
const maxWorkersSubscription = jest.fn();
|
||||
const pollIntervalSubscription = jest.fn();
|
||||
const errors$ = new Subject<Error>();
|
||||
const { capacityConfiguration$, pollIntervalConfiguration$ } = createManagedConfiguration({
|
||||
const { maxWorkersConfiguration$, pollIntervalConfiguration$ } = createManagedConfiguration({
|
||||
errors$,
|
||||
logger,
|
||||
config: {
|
||||
capacity: 10,
|
||||
poll_interval: 100,
|
||||
} as TaskManagerConfig,
|
||||
startingMaxWorkers: 100,
|
||||
startingPollInterval: 100,
|
||||
});
|
||||
capacityConfiguration$.subscribe(capacitySubscription);
|
||||
maxWorkersConfiguration$.subscribe(maxWorkersSubscription);
|
||||
pollIntervalConfiguration$.subscribe(pollIntervalSubscription);
|
||||
errors$.next(new Error('foo'));
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
expect(capacitySubscription).toHaveBeenCalledTimes(1);
|
||||
expect(maxWorkersSubscription).toHaveBeenCalledTimes(1);
|
||||
expect(pollIntervalSubscription).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
describe('capacity configuration', () => {
|
||||
function setupScenario(
|
||||
startingCapacity: number,
|
||||
claimStrategy: string = CLAIM_STRATEGY_DEFAULT
|
||||
) {
|
||||
describe('maxWorker configuration', () => {
|
||||
function setupScenario(startingMaxWorkers: number) {
|
||||
const errors$ = new Subject<Error>();
|
||||
const subscription = jest.fn();
|
||||
const { capacityConfiguration$ } = createManagedConfiguration({
|
||||
const { maxWorkersConfiguration$ } = createManagedConfiguration({
|
||||
errors$,
|
||||
startingMaxWorkers,
|
||||
logger,
|
||||
config: {
|
||||
capacity: startingCapacity,
|
||||
poll_interval: 1,
|
||||
claim_strategy: claimStrategy,
|
||||
} as TaskManagerConfig,
|
||||
startingPollInterval: 1,
|
||||
});
|
||||
capacityConfiguration$.subscribe(subscription);
|
||||
maxWorkersConfiguration$.subscribe(subscription);
|
||||
return { subscription, errors$ };
|
||||
}
|
||||
|
||||
|
@ -172,103 +81,66 @@ describe('createManagedConfiguration()', () => {
|
|||
|
||||
afterEach(() => clock.restore());
|
||||
|
||||
describe('default claim strategy', () => {
|
||||
test('should decrease configuration at the next interval when an error is emitted', async () => {
|
||||
const { subscription, errors$ } = setupScenario(10);
|
||||
errors$.next(SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b'));
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL - 1);
|
||||
expect(subscription).toHaveBeenCalledTimes(1);
|
||||
expect(subscription).toHaveBeenNthCalledWith(1, 10);
|
||||
clock.tick(1);
|
||||
expect(subscription).toHaveBeenCalledTimes(2);
|
||||
expect(subscription).toHaveBeenNthCalledWith(2, 8);
|
||||
});
|
||||
|
||||
test('should log a warning when the configuration changes from the starting value', async () => {
|
||||
const { errors$ } = setupScenario(10);
|
||||
errors$.next(SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b'));
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Capacity configuration is temporarily reduced after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
|
||||
);
|
||||
});
|
||||
|
||||
test('should increase configuration back to normal incrementally after an error is emitted', async () => {
|
||||
const { subscription, errors$ } = setupScenario(10);
|
||||
errors$.next(SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b'));
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL * 10);
|
||||
expect(subscription).toHaveBeenNthCalledWith(1, 10);
|
||||
expect(subscription).toHaveBeenNthCalledWith(2, 8);
|
||||
expect(subscription).toHaveBeenNthCalledWith(3, 9);
|
||||
expect(subscription).toHaveBeenNthCalledWith(4, 10);
|
||||
// No new calls due to value not changing and usage of distinctUntilChanged()
|
||||
expect(subscription).toHaveBeenCalledTimes(4);
|
||||
});
|
||||
|
||||
test('should keep reducing configuration when errors keep emitting until it reaches minimum', async () => {
|
||||
const { subscription, errors$ } = setupScenario(10);
|
||||
for (let i = 0; i < 20; i++) {
|
||||
errors$.next(SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b'));
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
}
|
||||
expect(subscription).toHaveBeenNthCalledWith(1, 10);
|
||||
expect(subscription).toHaveBeenNthCalledWith(2, 8);
|
||||
expect(subscription).toHaveBeenNthCalledWith(3, 6);
|
||||
expect(subscription).toHaveBeenNthCalledWith(4, 4);
|
||||
expect(subscription).toHaveBeenNthCalledWith(5, 3);
|
||||
expect(subscription).toHaveBeenNthCalledWith(6, 2);
|
||||
expect(subscription).toHaveBeenNthCalledWith(7, 1);
|
||||
// No new calls due to value not changing and usage of distinctUntilChanged()
|
||||
expect(subscription).toHaveBeenCalledTimes(7);
|
||||
});
|
||||
test('should decrease configuration at the next interval when an error is emitted', async () => {
|
||||
const { subscription, errors$ } = setupScenario(100);
|
||||
errors$.next(SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b'));
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL - 1);
|
||||
expect(subscription).toHaveBeenCalledTimes(1);
|
||||
clock.tick(1);
|
||||
expect(subscription).toHaveBeenCalledTimes(2);
|
||||
expect(subscription).toHaveBeenNthCalledWith(2, 80);
|
||||
});
|
||||
|
||||
describe('mget claim strategy', () => {
|
||||
test('should decrease configuration at the next interval when an error is emitted', async () => {
|
||||
const { subscription, errors$ } = setupScenario(10, CLAIM_STRATEGY_MGET);
|
||||
errors$.next(SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b'));
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL - 1);
|
||||
expect(subscription).toHaveBeenCalledTimes(1);
|
||||
expect(subscription).toHaveBeenNthCalledWith(1, 10);
|
||||
clock.tick(1);
|
||||
expect(subscription).toHaveBeenCalledTimes(2);
|
||||
expect(subscription).toHaveBeenNthCalledWith(2, 8);
|
||||
});
|
||||
test('should log a warning when the configuration changes from the starting value', async () => {
|
||||
const { errors$ } = setupScenario(100);
|
||||
errors$.next(SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b'));
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
|
||||
);
|
||||
});
|
||||
|
||||
test('should log a warning when the configuration changes from the starting value', async () => {
|
||||
const { errors$ } = setupScenario(10, CLAIM_STRATEGY_MGET);
|
||||
test('should increase configuration back to normal incrementally after an error is emitted', async () => {
|
||||
const { subscription, errors$ } = setupScenario(100);
|
||||
errors$.next(SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b'));
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL * 10);
|
||||
expect(subscription).toHaveBeenNthCalledWith(2, 80);
|
||||
expect(subscription).toHaveBeenNthCalledWith(3, 84);
|
||||
// 88.2- > 89 from Math.ceil
|
||||
expect(subscription).toHaveBeenNthCalledWith(4, 89);
|
||||
expect(subscription).toHaveBeenNthCalledWith(5, 94);
|
||||
expect(subscription).toHaveBeenNthCalledWith(6, 99);
|
||||
// 103.95 -> 100 from Math.min with starting value
|
||||
expect(subscription).toHaveBeenNthCalledWith(7, 100);
|
||||
// No new calls due to value not changing and usage of distinctUntilChanged()
|
||||
expect(subscription).toHaveBeenCalledTimes(7);
|
||||
});
|
||||
|
||||
test('should keep reducing configuration when errors keep emitting', async () => {
|
||||
const { subscription, errors$ } = setupScenario(100);
|
||||
for (let i = 0; i < 20; i++) {
|
||||
errors$.next(SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b'));
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Capacity configuration is temporarily reduced after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
|
||||
);
|
||||
});
|
||||
|
||||
test('should increase configuration back to normal incrementally after an error is emitted', async () => {
|
||||
const { subscription, errors$ } = setupScenario(10, CLAIM_STRATEGY_MGET);
|
||||
errors$.next(SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b'));
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL * 10);
|
||||
expect(subscription).toHaveBeenNthCalledWith(1, 10);
|
||||
expect(subscription).toHaveBeenNthCalledWith(2, 8);
|
||||
expect(subscription).toHaveBeenNthCalledWith(3, 9);
|
||||
expect(subscription).toHaveBeenNthCalledWith(4, 10);
|
||||
// No new calls due to value not changing and usage of distinctUntilChanged()
|
||||
expect(subscription).toHaveBeenCalledTimes(4);
|
||||
});
|
||||
|
||||
test('should keep reducing configuration when errors keep emitting until it reaches minimum', async () => {
|
||||
const { subscription, errors$ } = setupScenario(10, CLAIM_STRATEGY_MGET);
|
||||
for (let i = 0; i < 20; i++) {
|
||||
errors$.next(SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b'));
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
}
|
||||
expect(subscription).toHaveBeenNthCalledWith(1, 10);
|
||||
expect(subscription).toHaveBeenNthCalledWith(2, 8);
|
||||
expect(subscription).toHaveBeenNthCalledWith(3, 6);
|
||||
expect(subscription).toHaveBeenNthCalledWith(4, 5);
|
||||
// No new calls due to value not changing and usage of distinctUntilChanged()
|
||||
expect(subscription).toHaveBeenCalledTimes(4);
|
||||
});
|
||||
}
|
||||
expect(subscription).toHaveBeenNthCalledWith(2, 80);
|
||||
expect(subscription).toHaveBeenNthCalledWith(3, 64);
|
||||
// 51.2 -> 51 from Math.floor
|
||||
expect(subscription).toHaveBeenNthCalledWith(4, 51);
|
||||
expect(subscription).toHaveBeenNthCalledWith(5, 40);
|
||||
expect(subscription).toHaveBeenNthCalledWith(6, 32);
|
||||
expect(subscription).toHaveBeenNthCalledWith(7, 25);
|
||||
expect(subscription).toHaveBeenNthCalledWith(8, 20);
|
||||
expect(subscription).toHaveBeenNthCalledWith(9, 16);
|
||||
expect(subscription).toHaveBeenNthCalledWith(10, 12);
|
||||
expect(subscription).toHaveBeenNthCalledWith(11, 9);
|
||||
expect(subscription).toHaveBeenNthCalledWith(12, 7);
|
||||
expect(subscription).toHaveBeenNthCalledWith(13, 5);
|
||||
expect(subscription).toHaveBeenNthCalledWith(14, 4);
|
||||
expect(subscription).toHaveBeenNthCalledWith(15, 3);
|
||||
expect(subscription).toHaveBeenNthCalledWith(16, 2);
|
||||
expect(subscription).toHaveBeenNthCalledWith(17, 1);
|
||||
// No new calls due to value not changing and usage of distinctUntilChanged()
|
||||
expect(subscription).toHaveBeenCalledTimes(17);
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -279,10 +151,8 @@ describe('createManagedConfiguration()', () => {
|
|||
const { pollIntervalConfiguration$ } = createManagedConfiguration({
|
||||
logger,
|
||||
errors$,
|
||||
config: {
|
||||
poll_interval: startingPollInterval,
|
||||
capacity: 20,
|
||||
} as TaskManagerConfig,
|
||||
startingPollInterval,
|
||||
startingMaxWorkers: 1,
|
||||
});
|
||||
pollIntervalConfiguration$.subscribe(subscription);
|
||||
return { subscription, errors$ };
|
||||
|
|
|
@ -10,26 +10,17 @@ import { filter, mergeScan, map, scan, distinctUntilChanged, startWith } from 'r
|
|||
import { SavedObjectsErrorHelpers } from '@kbn/core/server';
|
||||
import { Logger } from '@kbn/core/server';
|
||||
import { isEsCannotExecuteScriptError } from './identify_es_error';
|
||||
import { CLAIM_STRATEGY_MGET, DEFAULT_CAPACITY, MAX_CAPACITY, TaskManagerConfig } from '../config';
|
||||
import { TaskCost } from '../task';
|
||||
|
||||
const FLUSH_MARKER = Symbol('flush');
|
||||
export const ADJUST_THROUGHPUT_INTERVAL = 10 * 1000;
|
||||
export const PREFERRED_MAX_POLL_INTERVAL = 60 * 1000;
|
||||
|
||||
// Capacity is measured in number of normal cost tasks that can be run
|
||||
// At a minimum, we need to be able to run a single task with the greatest cost
|
||||
// so we should convert the greatest cost to normal cost
|
||||
export const MIN_COST = TaskCost.ExtraLarge / TaskCost.Normal;
|
||||
|
||||
// For default claim strategy
|
||||
export const MIN_WORKERS = 1;
|
||||
|
||||
// When errors occur, reduce capacity by CAPACITY_DECREASE_PERCENTAGE
|
||||
// When errors no longer occur, start increasing capacity by CAPACITY_INCREASE_PERCENTAGE
|
||||
// When errors occur, reduce maxWorkers by MAX_WORKERS_DECREASE_PERCENTAGE
|
||||
// When errors no longer occur, start increasing maxWorkers by MAX_WORKERS_INCREASE_PERCENTAGE
|
||||
// until starting value is reached
|
||||
const CAPACITY_DECREASE_PERCENTAGE = 0.8;
|
||||
const CAPACITY_INCREASE_PERCENTAGE = 1.05;
|
||||
const MAX_WORKERS_DECREASE_PERCENTAGE = 0.8;
|
||||
const MAX_WORKERS_INCREASE_PERCENTAGE = 1.05;
|
||||
|
||||
// When errors occur, increase pollInterval by POLL_INTERVAL_INCREASE_PERCENTAGE
|
||||
// When errors no longer occur, start decreasing pollInterval by POLL_INTERVAL_DECREASE_PERCENTAGE
|
||||
|
@ -38,32 +29,28 @@ const POLL_INTERVAL_DECREASE_PERCENTAGE = 0.95;
|
|||
const POLL_INTERVAL_INCREASE_PERCENTAGE = 1.2;
|
||||
|
||||
interface ManagedConfigurationOpts {
|
||||
config: TaskManagerConfig;
|
||||
defaultCapacity?: number;
|
||||
errors$: Observable<Error>;
|
||||
logger: Logger;
|
||||
startingMaxWorkers: number;
|
||||
startingPollInterval: number;
|
||||
errors$: Observable<Error>;
|
||||
}
|
||||
|
||||
export interface ManagedConfiguration {
|
||||
startingCapacity: number;
|
||||
capacityConfiguration$: Observable<number>;
|
||||
maxWorkersConfiguration$: Observable<number>;
|
||||
pollIntervalConfiguration$: Observable<number>;
|
||||
}
|
||||
|
||||
export function createManagedConfiguration({
|
||||
config,
|
||||
defaultCapacity = DEFAULT_CAPACITY,
|
||||
logger,
|
||||
startingMaxWorkers,
|
||||
startingPollInterval,
|
||||
errors$,
|
||||
}: ManagedConfigurationOpts): ManagedConfiguration {
|
||||
const errorCheck$ = countErrors(errors$, ADJUST_THROUGHPUT_INTERVAL);
|
||||
const startingCapacity = calculateStartingCapacity(config, logger, defaultCapacity);
|
||||
const startingPollInterval = config.poll_interval;
|
||||
return {
|
||||
startingCapacity,
|
||||
capacityConfiguration$: errorCheck$.pipe(
|
||||
createCapacityScan(config, logger, startingCapacity),
|
||||
startWith(startingCapacity),
|
||||
maxWorkersConfiguration$: errorCheck$.pipe(
|
||||
createMaxWorkersScan(logger, startingMaxWorkers),
|
||||
startWith(startingMaxWorkers),
|
||||
distinctUntilChanged()
|
||||
),
|
||||
pollIntervalConfiguration$: errorCheck$.pipe(
|
||||
|
@ -74,39 +61,37 @@ export function createManagedConfiguration({
|
|||
};
|
||||
}
|
||||
|
||||
function createCapacityScan(config: TaskManagerConfig, logger: Logger, startingCapacity: number) {
|
||||
return scan((previousCapacity: number, errorCount: number) => {
|
||||
let newCapacity: number;
|
||||
function createMaxWorkersScan(logger: Logger, startingMaxWorkers: number) {
|
||||
return scan((previousMaxWorkers: number, errorCount: number) => {
|
||||
let newMaxWorkers: number;
|
||||
if (errorCount > 0) {
|
||||
const minCapacity = getMinCapacity(config);
|
||||
// Decrease capacity by CAPACITY_DECREASE_PERCENTAGE while making sure it doesn't go lower than minCapacity.
|
||||
// Decrease max workers by MAX_WORKERS_DECREASE_PERCENTAGE while making sure it doesn't go lower than 1.
|
||||
// Using Math.floor to make sure the number is different than previous while not being a decimal value.
|
||||
newCapacity = Math.max(
|
||||
Math.floor(previousCapacity * CAPACITY_DECREASE_PERCENTAGE),
|
||||
minCapacity
|
||||
newMaxWorkers = Math.max(
|
||||
Math.floor(previousMaxWorkers * MAX_WORKERS_DECREASE_PERCENTAGE),
|
||||
MIN_WORKERS
|
||||
);
|
||||
} else {
|
||||
// Increase capacity by CAPACITY_INCREASE_PERCENTAGE while making sure it doesn't go
|
||||
// Increase max workers by MAX_WORKERS_INCREASE_PERCENTAGE while making sure it doesn't go
|
||||
// higher than the starting value. Using Math.ceil to make sure the number is different than
|
||||
// previous while not being a decimal value
|
||||
newCapacity = Math.min(
|
||||
startingCapacity,
|
||||
Math.ceil(previousCapacity * CAPACITY_INCREASE_PERCENTAGE)
|
||||
newMaxWorkers = Math.min(
|
||||
startingMaxWorkers,
|
||||
Math.ceil(previousMaxWorkers * MAX_WORKERS_INCREASE_PERCENTAGE)
|
||||
);
|
||||
}
|
||||
|
||||
if (newCapacity !== previousCapacity) {
|
||||
if (newMaxWorkers !== previousMaxWorkers) {
|
||||
logger.debug(
|
||||
`Capacity configuration changing from ${previousCapacity} to ${newCapacity} after seeing ${errorCount} "too many request" and/or "execute [inline] script" error(s)`
|
||||
`Max workers configuration changing from ${previousMaxWorkers} to ${newMaxWorkers} after seeing ${errorCount} "too many request" and/or "execute [inline] script" error(s)`
|
||||
);
|
||||
if (previousCapacity === startingCapacity) {
|
||||
if (previousMaxWorkers === startingMaxWorkers) {
|
||||
logger.warn(
|
||||
`Capacity configuration is temporarily reduced after Elasticsearch returned ${errorCount} "too many request" and/or "execute [inline] script" error(s).`
|
||||
`Max workers configuration is temporarily reduced after Elasticsearch returned ${errorCount} "too many request" and/or "execute [inline] script" error(s).`
|
||||
);
|
||||
}
|
||||
}
|
||||
return newCapacity;
|
||||
}, startingCapacity);
|
||||
return newMaxWorkers;
|
||||
}, startingMaxWorkers);
|
||||
}
|
||||
|
||||
function createPollIntervalScan(logger: Logger, startingPollInterval: number) {
|
||||
|
@ -201,36 +186,3 @@ function resetErrorCount() {
|
|||
count: 0,
|
||||
};
|
||||
}
|
||||
|
||||
function getMinCapacity(config: TaskManagerConfig) {
|
||||
switch (config.claim_strategy) {
|
||||
case CLAIM_STRATEGY_MGET:
|
||||
return MIN_COST;
|
||||
|
||||
default:
|
||||
return MIN_WORKERS;
|
||||
}
|
||||
}
|
||||
|
||||
export function calculateStartingCapacity(
|
||||
config: TaskManagerConfig,
|
||||
logger: Logger,
|
||||
defaultCapacity: number
|
||||
): number {
|
||||
if (config.capacity !== undefined && config.max_workers !== undefined) {
|
||||
logger.warn(
|
||||
`Both "xpack.task_manager.capacity" and "xpack.task_manager.max_workers" configs are set, max_workers will be ignored in favor of capacity and the setting should be removed.`
|
||||
);
|
||||
}
|
||||
|
||||
if (config.capacity) {
|
||||
// Use capacity if explicitly set
|
||||
return config.capacity!;
|
||||
} else if (config.max_workers) {
|
||||
// Otherwise use max_worker value as capacity, capped at MAX_CAPACITY
|
||||
return Math.min(config.max_workers, MAX_CAPACITY);
|
||||
}
|
||||
|
||||
// Neither are set, use the given default capacity
|
||||
return defaultCapacity;
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@ describe('fillPool', () => {
|
|||
tasksUpdated: tasks?.length ?? 0,
|
||||
tasksConflicted: 0,
|
||||
tasksClaimed: 0,
|
||||
tasksRejected: 0,
|
||||
},
|
||||
docs: tasks,
|
||||
})
|
||||
|
|
|
@ -1,185 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { CLAIM_STRATEGY_DEFAULT, CLAIM_STRATEGY_MGET, DEFAULT_CAPACITY } from '../config';
|
||||
import { getDefaultCapacity } from './get_default_capacity';
|
||||
|
||||
describe('getDefaultCapacity', () => {
|
||||
it('returns default capacity when not in cloud', () => {
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: false,
|
||||
isServerless: false,
|
||||
isBackgroundTaskNodeOnly: false,
|
||||
claimStrategy: CLAIM_STRATEGY_MGET,
|
||||
})
|
||||
).toBe(DEFAULT_CAPACITY);
|
||||
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: false,
|
||||
isServerless: true,
|
||||
isBackgroundTaskNodeOnly: false,
|
||||
claimStrategy: CLAIM_STRATEGY_MGET,
|
||||
})
|
||||
).toBe(DEFAULT_CAPACITY);
|
||||
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: false,
|
||||
isServerless: false,
|
||||
isBackgroundTaskNodeOnly: true,
|
||||
claimStrategy: CLAIM_STRATEGY_MGET,
|
||||
})
|
||||
).toBe(DEFAULT_CAPACITY);
|
||||
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: false,
|
||||
isServerless: true,
|
||||
isBackgroundTaskNodeOnly: true,
|
||||
claimStrategy: CLAIM_STRATEGY_MGET,
|
||||
})
|
||||
).toBe(DEFAULT_CAPACITY);
|
||||
});
|
||||
|
||||
it('returns default capacity when default claim strategy', () => {
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: true,
|
||||
isServerless: false,
|
||||
isBackgroundTaskNodeOnly: false,
|
||||
claimStrategy: CLAIM_STRATEGY_DEFAULT,
|
||||
})
|
||||
).toBe(DEFAULT_CAPACITY);
|
||||
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: true,
|
||||
isServerless: false,
|
||||
isBackgroundTaskNodeOnly: true,
|
||||
claimStrategy: CLAIM_STRATEGY_DEFAULT,
|
||||
})
|
||||
).toBe(DEFAULT_CAPACITY);
|
||||
});
|
||||
|
||||
it('returns default capacity when serverless', () => {
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: false,
|
||||
isServerless: true,
|
||||
isBackgroundTaskNodeOnly: false,
|
||||
claimStrategy: CLAIM_STRATEGY_MGET,
|
||||
})
|
||||
).toBe(DEFAULT_CAPACITY);
|
||||
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: false,
|
||||
isServerless: true,
|
||||
isBackgroundTaskNodeOnly: true,
|
||||
claimStrategy: CLAIM_STRATEGY_MGET,
|
||||
})
|
||||
).toBe(DEFAULT_CAPACITY);
|
||||
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: true,
|
||||
isServerless: true,
|
||||
isBackgroundTaskNodeOnly: false,
|
||||
claimStrategy: CLAIM_STRATEGY_MGET,
|
||||
})
|
||||
).toBe(DEFAULT_CAPACITY);
|
||||
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: true,
|
||||
isServerless: true,
|
||||
isBackgroundTaskNodeOnly: true,
|
||||
claimStrategy: CLAIM_STRATEGY_MGET,
|
||||
})
|
||||
).toBe(DEFAULT_CAPACITY);
|
||||
});
|
||||
|
||||
it('returns capacity as expected when in cloud and claim strategy is mget', () => {
|
||||
// 1GB
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: true,
|
||||
isServerless: false,
|
||||
isBackgroundTaskNodeOnly: false,
|
||||
claimStrategy: CLAIM_STRATEGY_MGET,
|
||||
})
|
||||
).toBe(10);
|
||||
|
||||
// 1GB but somehow background task node only is true
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: true,
|
||||
isServerless: false,
|
||||
isBackgroundTaskNodeOnly: true,
|
||||
claimStrategy: CLAIM_STRATEGY_MGET,
|
||||
})
|
||||
).toBe(10);
|
||||
|
||||
// 2GB
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
heapSizeLimit: 1702887424,
|
||||
isCloud: true,
|
||||
isServerless: false,
|
||||
isBackgroundTaskNodeOnly: false,
|
||||
claimStrategy: CLAIM_STRATEGY_MGET,
|
||||
})
|
||||
).toBe(15);
|
||||
|
||||
// 2GB but somehow background task node only is true
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
heapSizeLimit: 1702887424,
|
||||
isCloud: true,
|
||||
isServerless: false,
|
||||
isBackgroundTaskNodeOnly: true,
|
||||
claimStrategy: CLAIM_STRATEGY_MGET,
|
||||
})
|
||||
).toBe(15);
|
||||
|
||||
// 4GB
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
heapSizeLimit: 3405774848,
|
||||
isCloud: true,
|
||||
isServerless: false,
|
||||
isBackgroundTaskNodeOnly: false,
|
||||
claimStrategy: CLAIM_STRATEGY_MGET,
|
||||
})
|
||||
).toBe(25);
|
||||
|
||||
// 4GB background task only
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
heapSizeLimit: 3405774848,
|
||||
isCloud: true,
|
||||
isServerless: false,
|
||||
isBackgroundTaskNodeOnly: true,
|
||||
claimStrategy: CLAIM_STRATEGY_MGET,
|
||||
})
|
||||
).toBe(50);
|
||||
});
|
||||
});
|
|
@ -1,51 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { CLAIM_STRATEGY_MGET, DEFAULT_CAPACITY } from '../config';
|
||||
|
||||
interface GetDefaultCapacityOpts {
|
||||
claimStrategy?: string;
|
||||
heapSizeLimit: number;
|
||||
isCloud: boolean;
|
||||
isServerless: boolean;
|
||||
isBackgroundTaskNodeOnly: boolean;
|
||||
}
|
||||
|
||||
// Map instance size to desired capacity
|
||||
const HEAP_TO_CAPACITY_MAP = [
|
||||
{ minHeap: 0, maxHeap: 1, capacity: 10 },
|
||||
{ minHeap: 1, maxHeap: 2, capacity: 15 },
|
||||
{ minHeap: 2, maxHeap: 4, capacity: 25, backgroundTaskNodeOnly: false },
|
||||
{ minHeap: 2, maxHeap: 4, capacity: 50, backgroundTaskNodeOnly: true },
|
||||
];
|
||||
|
||||
export function getDefaultCapacity({
|
||||
claimStrategy,
|
||||
heapSizeLimit: heapSizeLimitInBytes,
|
||||
isCloud,
|
||||
isServerless,
|
||||
isBackgroundTaskNodeOnly,
|
||||
}: GetDefaultCapacityOpts) {
|
||||
// perform heap size based calculations only in cloud
|
||||
if (isCloud && !isServerless && claimStrategy === CLAIM_STRATEGY_MGET) {
|
||||
// convert bytes to GB
|
||||
const heapSizeLimitInGB = heapSizeLimitInBytes / 1e9;
|
||||
|
||||
const config = HEAP_TO_CAPACITY_MAP.find((map) => {
|
||||
return (
|
||||
heapSizeLimitInGB > map.minHeap &&
|
||||
heapSizeLimitInGB <= map.maxHeap &&
|
||||
(map.backgroundTaskNodeOnly === undefined ||
|
||||
isBackgroundTaskNodeOnly === map.backgroundTaskNodeOnly)
|
||||
);
|
||||
});
|
||||
|
||||
return config?.capacity ?? DEFAULT_CAPACITY;
|
||||
}
|
||||
|
||||
return DEFAULT_CAPACITY;
|
||||
}
|
|
@ -435,8 +435,7 @@ function getMockMonitoredHealth(overrides = {}): MonitoredHealth {
|
|||
timestamp: new Date().toISOString(),
|
||||
status: HealthStatus.OK,
|
||||
value: {
|
||||
capacity: { config: 10, as_cost: 20, as_workers: 10 },
|
||||
claim_strategy: 'default',
|
||||
max_workers: 10,
|
||||
poll_interval: 3000,
|
||||
request_capacity: 1000,
|
||||
monitored_aggregated_stats_refresh_rate: 5000,
|
||||
|
@ -455,19 +454,16 @@ function getMockMonitoredHealth(overrides = {}): MonitoredHealth {
|
|||
status: HealthStatus.OK,
|
||||
value: {
|
||||
count: 4,
|
||||
cost: 8,
|
||||
task_types: {
|
||||
actions_telemetry: { count: 2, cost: 4, status: { idle: 2 } },
|
||||
alerting_telemetry: { count: 1, cost: 2, status: { idle: 1 } },
|
||||
session_cleanup: { count: 1, cost: 2, status: { idle: 1 } },
|
||||
actions_telemetry: { count: 2, status: { idle: 2 } },
|
||||
alerting_telemetry: { count: 1, status: { idle: 1 } },
|
||||
session_cleanup: { count: 1, status: { idle: 1 } },
|
||||
},
|
||||
schedule: [],
|
||||
overdue: 0,
|
||||
overdue_cost: 0,
|
||||
overdue_non_recurring: 0,
|
||||
estimatedScheduleDensity: [],
|
||||
non_recurring: 20,
|
||||
non_recurring_cost: 40,
|
||||
owner_ids: 2,
|
||||
estimated_schedule_density: [],
|
||||
capacity_requirements: {
|
||||
|
|
|
@ -45,6 +45,7 @@ const config: TaskManagerConfig = {
|
|||
warn_threshold: 5000,
|
||||
},
|
||||
max_attempts: 9,
|
||||
max_workers: 10,
|
||||
metrics_reset_interval: 30000,
|
||||
monitored_aggregated_stats_refresh_rate: 5000,
|
||||
monitored_stats_health_verbose_log: {
|
||||
|
|
|
@ -21,7 +21,7 @@ import {
|
|||
} from '../task_events';
|
||||
import { MonitoredStat } from './monitoring_stats_stream';
|
||||
import { AggregatedStat, AggregatedStatProvider } from '../lib/runtime_statistics_aggregator';
|
||||
import { createRunningAveragedStat } from './task_run_calculators';
|
||||
import { createRunningAveragedStat } from './task_run_calcultors';
|
||||
import { DEFAULT_WORKER_UTILIZATION_RUNNING_AVERAGE_WINDOW } from '../config';
|
||||
|
||||
export interface PublicBackgroundTaskUtilizationStat extends JsonObject {
|
||||
|
|
|
@ -21,7 +21,7 @@ describe('estimateCapacity', () => {
|
|||
estimateCapacity(
|
||||
logger,
|
||||
mockStats(
|
||||
{ capacity: { config: 10, as_cost: 20, as_workers: 10 }, poll_interval: 3000 },
|
||||
{ max_workers: 10, poll_interval: 3000 },
|
||||
{
|
||||
owner_ids: 1,
|
||||
overdue_non_recurring: 0,
|
||||
|
@ -77,7 +77,7 @@ describe('estimateCapacity', () => {
|
|||
estimateCapacity(
|
||||
logger,
|
||||
mockStats(
|
||||
{ capacity: { config: 10, as_cost: 20, as_workers: 10 }, poll_interval: 3000 },
|
||||
{ max_workers: 10, poll_interval: 3000 },
|
||||
{
|
||||
owner_ids: 1,
|
||||
overdue_non_recurring: 0,
|
||||
|
@ -135,7 +135,7 @@ describe('estimateCapacity', () => {
|
|||
estimateCapacity(
|
||||
logger,
|
||||
mockStats(
|
||||
{ capacity: { config: 10, as_cost: 20, as_workers: 10 }, poll_interval: 3000 },
|
||||
{ max_workers: 10, poll_interval: 3000 },
|
||||
{
|
||||
owner_ids: 1,
|
||||
overdue_non_recurring: 0,
|
||||
|
@ -172,7 +172,7 @@ describe('estimateCapacity', () => {
|
|||
estimateCapacity(
|
||||
logger,
|
||||
mockStats(
|
||||
{ capacity: { config: 10, as_cost: 20, as_workers: 10 }, poll_interval: 3000 },
|
||||
{ max_workers: 10, poll_interval: 3000 },
|
||||
{
|
||||
owner_ids: 1,
|
||||
overdue_non_recurring: 0,
|
||||
|
@ -228,7 +228,7 @@ describe('estimateCapacity', () => {
|
|||
estimateCapacity(
|
||||
logger,
|
||||
mockStats(
|
||||
{ capacity: { config: 10, as_cost: 20, as_workers: 10 }, poll_interval: 3000 },
|
||||
{ max_workers: 10, poll_interval: 3000 },
|
||||
{
|
||||
// 0 active tasks at this moment in time, so no owners identifiable
|
||||
owner_ids: 0,
|
||||
|
@ -285,7 +285,7 @@ describe('estimateCapacity', () => {
|
|||
estimateCapacity(
|
||||
logger,
|
||||
mockStats(
|
||||
{ capacity: { config: 10, as_cost: 20, as_workers: 10 }, poll_interval: 3000 },
|
||||
{ max_workers: 10, poll_interval: 3000 },
|
||||
{
|
||||
owner_ids: 3,
|
||||
overdue_non_recurring: 0,
|
||||
|
@ -347,7 +347,7 @@ describe('estimateCapacity', () => {
|
|||
estimateCapacity(
|
||||
logger,
|
||||
mockStats(
|
||||
{ capacity: { config: 10, as_cost: 20, as_workers: 10 }, poll_interval: 3000 },
|
||||
{ max_workers: 10, poll_interval: 3000 },
|
||||
{
|
||||
owner_ids: provisionedKibanaInstances,
|
||||
overdue_non_recurring: 0,
|
||||
|
@ -428,7 +428,7 @@ describe('estimateCapacity', () => {
|
|||
estimateCapacity(
|
||||
logger,
|
||||
mockStats(
|
||||
{ capacity: { config: 10, as_cost: 20, as_workers: 10 }, poll_interval: 3000 },
|
||||
{ max_workers: 10, poll_interval: 3000 },
|
||||
{
|
||||
owner_ids: provisionedKibanaInstances,
|
||||
overdue_non_recurring: 0,
|
||||
|
@ -510,7 +510,7 @@ describe('estimateCapacity', () => {
|
|||
estimateCapacity(
|
||||
logger,
|
||||
mockStats(
|
||||
{ capacity: { config: 10, as_cost: 20, as_workers: 10 }, poll_interval: 3000 },
|
||||
{ max_workers: 10, poll_interval: 3000 },
|
||||
{
|
||||
owner_ids: 1,
|
||||
overdue_non_recurring: 0,
|
||||
|
@ -578,7 +578,7 @@ describe('estimateCapacity', () => {
|
|||
estimateCapacity(
|
||||
logger,
|
||||
mockStats(
|
||||
{ capacity: { config: 10, as_cost: 20, as_workers: 10 }, poll_interval: 3000 },
|
||||
{ max_workers: 10, poll_interval: 3000 },
|
||||
{
|
||||
owner_ids: 1,
|
||||
overdue_non_recurring: 0,
|
||||
|
@ -643,7 +643,7 @@ describe('estimateCapacity', () => {
|
|||
estimateCapacity(
|
||||
logger,
|
||||
mockStats(
|
||||
{ capacity: { config: 10, as_cost: 20, as_workers: 10 }, poll_interval: 3000 },
|
||||
{ max_workers: 10, poll_interval: 3000 },
|
||||
{
|
||||
owner_ids: 1,
|
||||
overdue_non_recurring: 0,
|
||||
|
@ -708,7 +708,7 @@ describe('estimateCapacity', () => {
|
|||
estimateCapacity(
|
||||
logger,
|
||||
mockStats(
|
||||
{ capacity: { config: 10, as_cost: 20, as_workers: 10 }, poll_interval: 3000 },
|
||||
{ max_workers: 10, poll_interval: 3000 },
|
||||
{
|
||||
owner_ids: 1,
|
||||
overdue_non_recurring: 0,
|
||||
|
@ -784,7 +784,7 @@ describe('estimateCapacity', () => {
|
|||
estimateCapacity(
|
||||
logger,
|
||||
mockStats(
|
||||
{ capacity: { config: 10, as_cost: 20, as_workers: 10 }, poll_interval: 3000 },
|
||||
{ max_workers: 10, poll_interval: 3000 },
|
||||
{
|
||||
owner_ids: 1,
|
||||
overdue_non_recurring: 0,
|
||||
|
@ -862,7 +862,7 @@ describe('estimateCapacity', () => {
|
|||
estimateCapacity(
|
||||
logger,
|
||||
mockStats(
|
||||
{ capacity: { config: 10, as_cost: 20, as_workers: 10 }, poll_interval: 3000 },
|
||||
{ max_workers: 10, poll_interval: 3000 },
|
||||
{
|
||||
overdue: undefined,
|
||||
owner_ids: 1,
|
||||
|
@ -949,8 +949,7 @@ function mockStats(
|
|||
status: HealthStatus.OK,
|
||||
timestamp: new Date().toISOString(),
|
||||
value: {
|
||||
capacity: { config: 10, as_cost: 20, as_workers: 10 },
|
||||
claim_strategy: 'default',
|
||||
max_workers: 0,
|
||||
poll_interval: 0,
|
||||
request_capacity: 1000,
|
||||
monitored_aggregated_stats_refresh_rate: 5000,
|
||||
|
@ -970,19 +969,16 @@ function mockStats(
|
|||
timestamp: new Date().toISOString(),
|
||||
value: {
|
||||
count: 4,
|
||||
cost: 8,
|
||||
task_types: {
|
||||
actions_telemetry: { count: 2, cost: 4, status: { idle: 2 } },
|
||||
alerting_telemetry: { count: 1, cost: 2, status: { idle: 1 } },
|
||||
session_cleanup: { count: 1, cost: 2, status: { idle: 1 } },
|
||||
actions_telemetry: { count: 2, status: { idle: 2 } },
|
||||
alerting_telemetry: { count: 1, status: { idle: 1 } },
|
||||
session_cleanup: { count: 1, status: { idle: 1 } },
|
||||
},
|
||||
schedule: [],
|
||||
overdue: 0,
|
||||
overdue_cost: 0,
|
||||
overdue_non_recurring: 0,
|
||||
estimated_schedule_density: [],
|
||||
non_recurring: 20,
|
||||
non_recurring_cost: 40,
|
||||
owner_ids: 2,
|
||||
capacity_requirements: {
|
||||
per_minute: 150,
|
||||
|
|
|
@ -10,7 +10,7 @@ import stats from 'stats-lite';
|
|||
import { JsonObject } from '@kbn/utility-types';
|
||||
import { Logger } from '@kbn/core/server';
|
||||
import { RawMonitoringStats, RawMonitoredStat, HealthStatus } from './monitoring_stats_stream';
|
||||
import { AveragedStat } from './task_run_calculators';
|
||||
import { AveragedStat } from './task_run_calcultors';
|
||||
import { TaskPersistenceTypes } from './task_run_statistics';
|
||||
import { asErr, asOk, map, Result } from '../lib/result_type';
|
||||
|
||||
|
@ -61,10 +61,8 @@ export function estimateCapacity(
|
|||
non_recurring: percentageOfExecutionsUsedByNonRecurringTasks,
|
||||
} = capacityStats.runtime.value.execution.persistence;
|
||||
const { overdue, capacity_requirements: capacityRequirements } = workload;
|
||||
const {
|
||||
poll_interval: pollInterval,
|
||||
capacity: { config: configuredCapacity },
|
||||
} = capacityStats.configuration.value;
|
||||
const { poll_interval: pollInterval, max_workers: maxWorkers } =
|
||||
capacityStats.configuration.value;
|
||||
|
||||
/**
|
||||
* On average, how many polling cycles does it take to execute a task?
|
||||
|
@ -80,10 +78,10 @@ export function estimateCapacity(
|
|||
);
|
||||
|
||||
/**
|
||||
* Given the current configuration how much capacity do we have to run normal cost tasks?
|
||||
* Given the current configuration how much task capacity do we have?
|
||||
*/
|
||||
const capacityPerMinutePerKibana = Math.round(
|
||||
((60 * 1000) / (averagePollIntervalsPerExecution * pollInterval)) * configuredCapacity
|
||||
((60 * 1000) / (averagePollIntervalsPerExecution * pollInterval)) * maxWorkers
|
||||
);
|
||||
|
||||
/**
|
||||
|
|
|
@ -13,6 +13,7 @@ import { TaskManagerConfig } from '../config';
|
|||
describe('Configuration Statistics Aggregator', () => {
|
||||
test('merges the static config with the merged configs', async () => {
|
||||
const configuration: TaskManagerConfig = {
|
||||
max_workers: 10,
|
||||
max_attempts: 9,
|
||||
poll_interval: 6000000,
|
||||
allow_reading_invalid_state: false,
|
||||
|
@ -54,8 +55,7 @@ describe('Configuration Statistics Aggregator', () => {
|
|||
};
|
||||
|
||||
const managedConfig = {
|
||||
startingCapacity: 10,
|
||||
capacityConfiguration$: new Subject<number>(),
|
||||
maxWorkersConfiguration$: new Subject<number>(),
|
||||
pollIntervalConfiguration$: new Subject<number>(),
|
||||
};
|
||||
|
||||
|
@ -65,12 +65,7 @@ describe('Configuration Statistics Aggregator', () => {
|
|||
.pipe(take(3), bufferCount(3))
|
||||
.subscribe(([initial, updatedWorkers, updatedInterval]) => {
|
||||
expect(initial.value).toEqual({
|
||||
capacity: {
|
||||
config: 10,
|
||||
as_workers: 10,
|
||||
as_cost: 20,
|
||||
},
|
||||
claim_strategy: 'default',
|
||||
max_workers: 10,
|
||||
poll_interval: 6000000,
|
||||
request_capacity: 1000,
|
||||
monitored_aggregated_stats_refresh_rate: 5000,
|
||||
|
@ -84,12 +79,7 @@ describe('Configuration Statistics Aggregator', () => {
|
|||
},
|
||||
});
|
||||
expect(updatedWorkers.value).toEqual({
|
||||
capacity: {
|
||||
config: 8,
|
||||
as_workers: 8,
|
||||
as_cost: 16,
|
||||
},
|
||||
claim_strategy: 'default',
|
||||
max_workers: 8,
|
||||
poll_interval: 6000000,
|
||||
request_capacity: 1000,
|
||||
monitored_aggregated_stats_refresh_rate: 5000,
|
||||
|
@ -103,12 +93,7 @@ describe('Configuration Statistics Aggregator', () => {
|
|||
},
|
||||
});
|
||||
expect(updatedInterval.value).toEqual({
|
||||
capacity: {
|
||||
config: 8,
|
||||
as_workers: 8,
|
||||
as_cost: 16,
|
||||
},
|
||||
claim_strategy: 'default',
|
||||
max_workers: 8,
|
||||
poll_interval: 3000,
|
||||
request_capacity: 1000,
|
||||
monitored_aggregated_stats_refresh_rate: 5000,
|
||||
|
@ -123,7 +108,7 @@ describe('Configuration Statistics Aggregator', () => {
|
|||
});
|
||||
resolve();
|
||||
}, reject);
|
||||
managedConfig.capacityConfiguration$.next(8);
|
||||
managedConfig.maxWorkersConfiguration$.next(8);
|
||||
managedConfig.pollIntervalConfiguration$.next(3000);
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
|
|
|
@ -8,11 +8,9 @@
|
|||
import { combineLatest, of } from 'rxjs';
|
||||
import { pick, merge } from 'lodash';
|
||||
import { map, startWith } from 'rxjs';
|
||||
import { JsonObject } from '@kbn/utility-types';
|
||||
import { AggregatedStatProvider } from '../lib/runtime_statistics_aggregator';
|
||||
import { CLAIM_STRATEGY_DEFAULT, TaskManagerConfig } from '../config';
|
||||
import { TaskManagerConfig } from '../config';
|
||||
import { ManagedConfiguration } from '../lib/create_managed_configuration';
|
||||
import { getCapacityInCost, getCapacityInWorkers } from '../task_pool';
|
||||
|
||||
const CONFIG_FIELDS_TO_EXPOSE = [
|
||||
'request_capacity',
|
||||
|
@ -21,19 +19,10 @@ const CONFIG_FIELDS_TO_EXPOSE = [
|
|||
'monitored_task_execution_thresholds',
|
||||
] as const;
|
||||
|
||||
interface CapacityConfig extends JsonObject {
|
||||
capacity: {
|
||||
config: number;
|
||||
as_workers: number;
|
||||
as_cost: number;
|
||||
};
|
||||
}
|
||||
|
||||
export type ConfigStat = Pick<
|
||||
TaskManagerConfig,
|
||||
'poll_interval' | 'claim_strategy' | (typeof CONFIG_FIELDS_TO_EXPOSE)[number]
|
||||
> &
|
||||
CapacityConfig;
|
||||
'max_workers' | 'poll_interval' | (typeof CONFIG_FIELDS_TO_EXPOSE)[number]
|
||||
>;
|
||||
|
||||
export function createConfigurationAggregator(
|
||||
config: TaskManagerConfig,
|
||||
|
@ -41,21 +30,16 @@ export function createConfigurationAggregator(
|
|||
): AggregatedStatProvider<ConfigStat> {
|
||||
return combineLatest([
|
||||
of(pick(config, ...CONFIG_FIELDS_TO_EXPOSE)),
|
||||
of({ claim_strategy: config.claim_strategy ?? CLAIM_STRATEGY_DEFAULT }),
|
||||
managedConfig.pollIntervalConfiguration$.pipe(
|
||||
startWith(config.poll_interval),
|
||||
map<number, Pick<TaskManagerConfig, 'poll_interval'>>((pollInterval) => ({
|
||||
poll_interval: pollInterval,
|
||||
}))
|
||||
),
|
||||
managedConfig.capacityConfiguration$.pipe(
|
||||
startWith(managedConfig.startingCapacity),
|
||||
map<number, CapacityConfig>((capacity) => ({
|
||||
capacity: {
|
||||
config: capacity,
|
||||
as_workers: getCapacityInWorkers(capacity),
|
||||
as_cost: getCapacityInCost(capacity),
|
||||
},
|
||||
managedConfig.maxWorkersConfiguration$.pipe(
|
||||
startWith(config.max_workers),
|
||||
map<number, Pick<TaskManagerConfig, 'max_workers'>>((maxWorkers) => ({
|
||||
max_workers: maxWorkers,
|
||||
}))
|
||||
),
|
||||
]).pipe(
|
||||
|
|
|
@ -176,11 +176,11 @@ describe('Ephemeral Task Statistics', () => {
|
|||
});
|
||||
|
||||
const runningAverageWindowSize = 5;
|
||||
const capacity = 10;
|
||||
const maxWorkers = 10;
|
||||
const ephemeralTaskAggregator = createEphemeralTaskAggregator(
|
||||
ephemeralTaskLifecycle,
|
||||
runningAverageWindowSize,
|
||||
capacity
|
||||
maxWorkers
|
||||
);
|
||||
|
||||
function expectWindowEqualsUpdate(
|
||||
|
@ -229,7 +229,7 @@ describe('Ephemeral Task Statistics', () => {
|
|||
});
|
||||
});
|
||||
|
||||
test('returns the average load added per polling cycle cycle by ephemeral tasks when load exceeds capacity', async () => {
|
||||
test('returns the average load added per polling cycle cycle by ephemeral tasks when load exceeds max workers', async () => {
|
||||
const tasksExecuted = [0, 5, 10, 20, 15, 10, 5, 0, 0, 0, 0, 0];
|
||||
const expectedLoad = [0, 50, 100, 200, 150, 100, 50, 0, 0, 0, 0, 0];
|
||||
|
||||
|
@ -241,11 +241,11 @@ test('returns the average load added per polling cycle cycle by ephemeral tasks
|
|||
});
|
||||
|
||||
const runningAverageWindowSize = 5;
|
||||
const capacity = 10;
|
||||
const maxWorkers = 10;
|
||||
const ephemeralTaskAggregator = createEphemeralTaskAggregator(
|
||||
ephemeralTaskLifecycle,
|
||||
runningAverageWindowSize,
|
||||
capacity
|
||||
maxWorkers
|
||||
);
|
||||
|
||||
function expectWindowEqualsUpdate(
|
||||
|
|
|
@ -17,7 +17,7 @@ import {
|
|||
AveragedStat,
|
||||
calculateRunningAverage,
|
||||
createRunningAveragedStat,
|
||||
} from './task_run_calculators';
|
||||
} from './task_run_calcultors';
|
||||
import { HealthStatus } from './monitoring_stats_stream';
|
||||
|
||||
export interface EphemeralTaskStat extends JsonObject {
|
||||
|
@ -35,7 +35,7 @@ export interface SummarizedEphemeralTaskStat extends JsonObject {
|
|||
export function createEphemeralTaskAggregator(
|
||||
ephemeralTaskLifecycle: EphemeralTaskLifecycle,
|
||||
runningAverageWindowSize: number,
|
||||
capacity: number
|
||||
maxWorkers: number
|
||||
): AggregatedStatProvider<EphemeralTaskStat> {
|
||||
const ephemeralTaskRunEvents$ = ephemeralTaskLifecycle.events.pipe(
|
||||
filter((taskEvent: TaskLifecycleEvent) => isTaskRunEvent(taskEvent))
|
||||
|
@ -70,7 +70,7 @@ export function createEphemeralTaskAggregator(
|
|||
map(([tasksRanSincePreviousQueueSize, ephemeralQueueSize]) => ({
|
||||
queuedTasks: ephemeralQueuedTasksQueue(ephemeralQueueSize),
|
||||
executionsPerCycle: ephemeralQueueExecutionsPerCycleQueue(tasksRanSincePreviousQueueSize),
|
||||
load: ephemeralTaskLoadQueue(calculateWorkerLoad(capacity, tasksRanSincePreviousQueueSize)),
|
||||
load: ephemeralTaskLoadQueue(calculateWorkerLoad(maxWorkers, tasksRanSincePreviousQueueSize)),
|
||||
})),
|
||||
startWith({
|
||||
queuedTasks: [],
|
||||
|
|
|
@ -18,7 +18,6 @@ import { TaskPollingLifecycle } from '../polling_lifecycle';
|
|||
import { ManagedConfiguration } from '../lib/create_managed_configuration';
|
||||
import { EphemeralTaskLifecycle } from '../ephemeral_task_lifecycle';
|
||||
import { AdHocTaskCounter } from '../lib/adhoc_task_counter';
|
||||
import { TaskTypeDictionary } from '../task_type_dictionary';
|
||||
|
||||
export type { MonitoringStats, RawMonitoringStats } from './monitoring_stats_stream';
|
||||
export {
|
||||
|
@ -28,20 +27,27 @@ export {
|
|||
createMonitoringStatsStream,
|
||||
} from './monitoring_stats_stream';
|
||||
|
||||
export interface CreateMonitoringStatsOpts {
|
||||
taskStore: TaskStore;
|
||||
elasticsearchAndSOAvailability$: Observable<boolean>;
|
||||
config: TaskManagerConfig;
|
||||
managedConfig: ManagedConfiguration;
|
||||
logger: Logger;
|
||||
adHocTaskCounter: AdHocTaskCounter;
|
||||
taskDefinitions: TaskTypeDictionary;
|
||||
taskPollingLifecycle?: TaskPollingLifecycle;
|
||||
ephemeralTaskLifecycle?: EphemeralTaskLifecycle;
|
||||
}
|
||||
|
||||
export function createMonitoringStats(
|
||||
opts: CreateMonitoringStatsOpts
|
||||
taskStore: TaskStore,
|
||||
elasticsearchAndSOAvailability$: Observable<boolean>,
|
||||
config: TaskManagerConfig,
|
||||
managedConfig: ManagedConfiguration,
|
||||
logger: Logger,
|
||||
adHocTaskCounter: AdHocTaskCounter,
|
||||
taskPollingLifecycle?: TaskPollingLifecycle,
|
||||
ephemeralTaskLifecycle?: EphemeralTaskLifecycle
|
||||
): Observable<MonitoringStats> {
|
||||
return createMonitoringStatsStream(createAggregators(opts));
|
||||
return createMonitoringStatsStream(
|
||||
createAggregators(
|
||||
taskStore,
|
||||
elasticsearchAndSOAvailability$,
|
||||
config,
|
||||
managedConfig,
|
||||
logger,
|
||||
adHocTaskCounter,
|
||||
taskPollingLifecycle,
|
||||
ephemeralTaskLifecycle
|
||||
),
|
||||
config
|
||||
);
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import { TaskManagerConfig } from '../config';
|
||||
import { of, Subject } from 'rxjs';
|
||||
import { take, bufferCount } from 'rxjs';
|
||||
import { createMonitoringStatsStream } from './monitoring_stats_stream';
|
||||
|
@ -16,9 +17,51 @@ beforeEach(() => {
|
|||
});
|
||||
|
||||
describe('createMonitoringStatsStream', () => {
|
||||
const configuration: TaskManagerConfig = {
|
||||
max_workers: 10,
|
||||
max_attempts: 9,
|
||||
poll_interval: 6000000,
|
||||
allow_reading_invalid_state: false,
|
||||
version_conflict_threshold: 80,
|
||||
monitored_stats_required_freshness: 6000000,
|
||||
request_capacity: 1000,
|
||||
monitored_aggregated_stats_refresh_rate: 5000,
|
||||
monitored_stats_health_verbose_log: {
|
||||
enabled: false,
|
||||
level: 'debug' as const,
|
||||
warn_delayed_task_start_in_seconds: 60,
|
||||
},
|
||||
monitored_stats_running_average_window: 50,
|
||||
monitored_task_execution_thresholds: {
|
||||
default: {
|
||||
error_threshold: 90,
|
||||
warn_threshold: 80,
|
||||
},
|
||||
custom: {},
|
||||
},
|
||||
ephemeral_tasks: {
|
||||
enabled: true,
|
||||
request_capacity: 10,
|
||||
},
|
||||
unsafe: {
|
||||
exclude_task_types: [],
|
||||
authenticate_background_task_utilization: true,
|
||||
},
|
||||
event_loop_delay: {
|
||||
monitor: true,
|
||||
warn_threshold: 5000,
|
||||
},
|
||||
worker_utilization_running_average_window: 5,
|
||||
metrics_reset_interval: 3000,
|
||||
claim_strategy: 'default',
|
||||
request_timeouts: {
|
||||
update_by_query: 1000,
|
||||
},
|
||||
};
|
||||
|
||||
it('returns the initial config used to configure Task Manager', async () => {
|
||||
return new Promise<void>((resolve) => {
|
||||
createMonitoringStatsStream(of())
|
||||
createMonitoringStatsStream(of(), configuration)
|
||||
.pipe(take(1))
|
||||
.subscribe((firstValue) => {
|
||||
expect(firstValue.stats).toEqual({});
|
||||
|
@ -31,7 +74,7 @@ describe('createMonitoringStatsStream', () => {
|
|||
const aggregatedStats$ = new Subject<AggregatedStat>();
|
||||
|
||||
return new Promise<void>((resolve) => {
|
||||
createMonitoringStatsStream(aggregatedStats$)
|
||||
createMonitoringStatsStream(aggregatedStats$, configuration)
|
||||
.pipe(take(3), bufferCount(3))
|
||||
.subscribe(([initialValue, secondValue, thirdValue]) => {
|
||||
expect(initialValue.stats).toMatchObject({
|
||||
|
@ -39,7 +82,7 @@ describe('createMonitoringStatsStream', () => {
|
|||
stats: {
|
||||
configuration: {
|
||||
value: {
|
||||
capacity: 10,
|
||||
max_workers: 10,
|
||||
poll_interval: 6000000,
|
||||
request_capacity: 1000,
|
||||
monitored_aggregated_stats_refresh_rate: 5000,
|
||||
|
@ -72,7 +115,7 @@ describe('createMonitoringStatsStream', () => {
|
|||
configuration: {
|
||||
timestamp: expect.any(String),
|
||||
value: {
|
||||
capacity: 10,
|
||||
max_workers: 10,
|
||||
poll_interval: 6000000,
|
||||
request_capacity: 1000,
|
||||
monitored_aggregated_stats_refresh_rate: 5000,
|
||||
|
@ -105,7 +148,7 @@ describe('createMonitoringStatsStream', () => {
|
|||
configuration: {
|
||||
timestamp: expect.any(String),
|
||||
value: {
|
||||
capacity: 10,
|
||||
max_workers: 10,
|
||||
poll_interval: 6000000,
|
||||
request_capacity: 1000,
|
||||
monitored_aggregated_stats_refresh_rate: 5000,
|
||||
|
|
|
@ -10,6 +10,8 @@ import { map, scan } from 'rxjs';
|
|||
import { set } from '@kbn/safer-lodash-set';
|
||||
import { Logger } from '@kbn/core/server';
|
||||
import { JsonObject } from '@kbn/utility-types';
|
||||
import { TaskStore } from '../task_store';
|
||||
import { TaskPollingLifecycle } from '../polling_lifecycle';
|
||||
import {
|
||||
createWorkloadAggregator,
|
||||
summarizeWorkloadStat,
|
||||
|
@ -35,9 +37,11 @@ import {
|
|||
|
||||
import { ConfigStat, createConfigurationAggregator } from './configuration_statistics';
|
||||
import { TaskManagerConfig } from '../config';
|
||||
import { ManagedConfiguration } from '../lib/create_managed_configuration';
|
||||
import { EphemeralTaskLifecycle } from '../ephemeral_task_lifecycle';
|
||||
import { CapacityEstimationStat, withCapacityEstimate } from './capacity_estimation';
|
||||
import { AdHocTaskCounter } from '../lib/adhoc_task_counter';
|
||||
import { AggregatedStatProvider } from '../lib/runtime_statistics_aggregator';
|
||||
import { CreateMonitoringStatsOpts } from '.';
|
||||
|
||||
export interface MonitoringStats {
|
||||
last_update: string;
|
||||
|
@ -77,28 +81,26 @@ export interface RawMonitoringStats {
|
|||
};
|
||||
}
|
||||
|
||||
export function createAggregators({
|
||||
taskStore,
|
||||
elasticsearchAndSOAvailability$,
|
||||
config,
|
||||
managedConfig,
|
||||
logger,
|
||||
taskDefinitions,
|
||||
adHocTaskCounter,
|
||||
taskPollingLifecycle,
|
||||
ephemeralTaskLifecycle,
|
||||
}: CreateMonitoringStatsOpts): AggregatedStatProvider {
|
||||
export function createAggregators(
|
||||
taskStore: TaskStore,
|
||||
elasticsearchAndSOAvailability$: Observable<boolean>,
|
||||
config: TaskManagerConfig,
|
||||
managedConfig: ManagedConfiguration,
|
||||
logger: Logger,
|
||||
adHocTaskCounter: AdHocTaskCounter,
|
||||
taskPollingLifecycle?: TaskPollingLifecycle,
|
||||
ephemeralTaskLifecycle?: EphemeralTaskLifecycle
|
||||
): AggregatedStatProvider {
|
||||
const aggregators: AggregatedStatProvider[] = [
|
||||
createConfigurationAggregator(config, managedConfig),
|
||||
|
||||
createWorkloadAggregator({
|
||||
createWorkloadAggregator(
|
||||
taskStore,
|
||||
elasticsearchAndSOAvailability$,
|
||||
refreshInterval: config.monitored_aggregated_stats_refresh_rate,
|
||||
pollInterval: config.poll_interval,
|
||||
logger,
|
||||
taskDefinitions,
|
||||
}),
|
||||
config.monitored_aggregated_stats_refresh_rate,
|
||||
config.poll_interval,
|
||||
logger
|
||||
),
|
||||
];
|
||||
if (taskPollingLifecycle) {
|
||||
aggregators.push(
|
||||
|
@ -116,7 +118,7 @@ export function createAggregators({
|
|||
createEphemeralTaskAggregator(
|
||||
ephemeralTaskLifecycle,
|
||||
config.monitored_stats_running_average_window,
|
||||
managedConfig.startingCapacity
|
||||
config.max_workers
|
||||
)
|
||||
);
|
||||
}
|
||||
|
@ -124,7 +126,8 @@ export function createAggregators({
|
|||
}
|
||||
|
||||
export function createMonitoringStatsStream(
|
||||
provider$: AggregatedStatProvider
|
||||
provider$: AggregatedStatProvider,
|
||||
config: TaskManagerConfig
|
||||
): Observable<MonitoringStats> {
|
||||
const initialStats = {
|
||||
last_update: new Date().toISOString(),
|
||||
|
|
|
@ -12,7 +12,7 @@ import {
|
|||
calculateFrequency,
|
||||
createRunningAveragedStat,
|
||||
createMapOfRunningAveragedStats,
|
||||
} from './task_run_calculators';
|
||||
} from './task_run_calcultors';
|
||||
|
||||
describe('calculateRunningAverage', () => {
|
||||
test('calculates the running average and median of a window of values', async () => {
|
|
@ -35,7 +35,7 @@ import {
|
|||
calculateFrequency,
|
||||
createRunningAveragedStat,
|
||||
createMapOfRunningAveragedStats,
|
||||
} from './task_run_calculators';
|
||||
} from './task_run_calcultors';
|
||||
import { HealthStatus } from './monitoring_stats_stream';
|
||||
import { TaskPollingLifecycle } from '../polling_lifecycle';
|
||||
import { TaskExecutionFailureThreshold, TaskManagerConfig } from '../config';
|
||||
|
|
|
@ -15,14 +15,13 @@ import {
|
|||
padBuckets,
|
||||
estimateRecurringTaskScheduling,
|
||||
} from './workload_statistics';
|
||||
import { ConcreteTaskInstance, TaskCost } from '../task';
|
||||
import { ConcreteTaskInstance } from '../task';
|
||||
|
||||
import { times } from 'lodash';
|
||||
import { taskStoreMock } from '../task_store.mock';
|
||||
import { of, Subject } from 'rxjs';
|
||||
import { sleep } from '../test_utils';
|
||||
import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
|
||||
import { TaskTypeDictionary } from '../task_type_dictionary';
|
||||
|
||||
type ResponseWithAggs = Omit<estypes.SearchResponse<ConcreteTaskInstance>, 'aggregations'> & {
|
||||
aggregations: WorkloadAggregationResponse;
|
||||
|
@ -33,98 +32,52 @@ const asApiResponse = (body: ResponseWithAggs) =>
|
|||
.createSuccessTransportRequestPromise(body as estypes.SearchResponse<ConcreteTaskInstance>)
|
||||
.then((res) => res.body as ResponseWithAggs);
|
||||
|
||||
const logger = loggingSystemMock.create().get();
|
||||
|
||||
const definitions = new TaskTypeDictionary(logger);
|
||||
definitions.registerTaskDefinitions({
|
||||
report: {
|
||||
title: 'report',
|
||||
cost: TaskCost.ExtraLarge,
|
||||
createTaskRunner: jest.fn(),
|
||||
},
|
||||
foo: {
|
||||
title: 'foo',
|
||||
createTaskRunner: jest.fn(),
|
||||
},
|
||||
bar: {
|
||||
title: 'bar',
|
||||
cost: TaskCost.Tiny,
|
||||
createTaskRunner: jest.fn(),
|
||||
},
|
||||
});
|
||||
describe('Workload Statistics Aggregator', () => {
|
||||
beforeEach(() => {
|
||||
jest.resetAllMocks();
|
||||
});
|
||||
|
||||
test('queries the Task Store at a fixed interval for the current workload', async () => {
|
||||
const taskStore = taskStoreMock.create({});
|
||||
taskStore.aggregate.mockResolvedValue(
|
||||
asApiResponse({
|
||||
hits: { hits: [], max_score: 0, total: { value: 3, relation: 'eq' } },
|
||||
hits: {
|
||||
hits: [],
|
||||
max_score: 0,
|
||||
total: { value: 0, relation: 'eq' },
|
||||
},
|
||||
took: 1,
|
||||
timed_out: false,
|
||||
_shards: { total: 1, successful: 1, skipped: 1, failed: 0 },
|
||||
_shards: {
|
||||
total: 1,
|
||||
successful: 1,
|
||||
skipped: 1,
|
||||
failed: 0,
|
||||
},
|
||||
aggregations: {
|
||||
taskType: {
|
||||
buckets: [
|
||||
{
|
||||
key: 'foo',
|
||||
doc_count: 1,
|
||||
status: {
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
buckets: [{ key: 'idle', doc_count: 1 }],
|
||||
},
|
||||
},
|
||||
{
|
||||
key: 'bar',
|
||||
doc_count: 1,
|
||||
status: {
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
buckets: [{ key: 'claiming', doc_count: 1 }],
|
||||
},
|
||||
},
|
||||
{
|
||||
key: 'report',
|
||||
doc_count: 1,
|
||||
status: {
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
buckets: [{ key: 'idle', doc_count: 1 }],
|
||||
},
|
||||
},
|
||||
],
|
||||
buckets: [],
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
},
|
||||
schedule: {
|
||||
buckets: [{ key: '1m', doc_count: 8 }],
|
||||
buckets: [],
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
},
|
||||
nonRecurringTasks: {
|
||||
doc_count: 1,
|
||||
taskType: {
|
||||
buckets: [{ key: 'report', doc_count: 1 }],
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
doc_count: 13,
|
||||
},
|
||||
ownerIds: {
|
||||
ownerIds: {
|
||||
value: 1,
|
||||
},
|
||||
},
|
||||
ownerIds: { ownerIds: { value: 1 } },
|
||||
// The `FiltersAggregate` doesn't cover the case of a nested `AggregationsAggregationContainer`, in which `FiltersAggregate`
|
||||
// would not have a `buckets` property, but rather a keyed property that's inferred from the request.
|
||||
// @ts-expect-error
|
||||
idleTasks: {
|
||||
doc_count: 0,
|
||||
overdue: {
|
||||
doc_count: 1,
|
||||
nonRecurring: { doc_count: 0 },
|
||||
taskTypes: {
|
||||
buckets: [{ key: 'foo', doc_count: 1 }],
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
doc_count: 0,
|
||||
nonRecurring: {
|
||||
doc_count: 0,
|
||||
},
|
||||
},
|
||||
scheduleDensity: {
|
||||
|
@ -136,7 +89,9 @@ describe('Workload Statistics Aggregator', () => {
|
|||
to: 1.601651976274e12,
|
||||
to_as_string: '2020-10-02T15:19:36.274Z',
|
||||
doc_count: 0,
|
||||
histogram: { buckets: [] },
|
||||
histogram: {
|
||||
buckets: [],
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
|
@ -145,51 +100,87 @@ describe('Workload Statistics Aggregator', () => {
|
|||
})
|
||||
);
|
||||
|
||||
const workloadAggregator = createWorkloadAggregator({
|
||||
const workloadAggregator = createWorkloadAggregator(
|
||||
taskStore,
|
||||
elasticsearchAndSOAvailability$: of(true),
|
||||
refreshInterval: 10,
|
||||
pollInterval: 3000,
|
||||
logger,
|
||||
taskDefinitions: definitions,
|
||||
});
|
||||
of(true),
|
||||
10,
|
||||
3000,
|
||||
loggingSystemMock.create().get()
|
||||
);
|
||||
|
||||
return new Promise<void>((resolve) => {
|
||||
workloadAggregator.pipe(first()).subscribe(() => {
|
||||
expect(taskStore.aggregate).toHaveBeenCalledWith({
|
||||
aggs: {
|
||||
taskType: {
|
||||
terms: { size: 3, field: 'task.taskType' },
|
||||
aggs: { status: { terms: { field: 'task.status' } } },
|
||||
terms: { size: 100, field: 'task.taskType' },
|
||||
aggs: {
|
||||
status: {
|
||||
terms: { field: 'task.status' },
|
||||
},
|
||||
},
|
||||
},
|
||||
schedule: {
|
||||
terms: { field: 'task.schedule.interval', size: 100 },
|
||||
terms: {
|
||||
field: 'task.schedule.interval',
|
||||
size: 100,
|
||||
},
|
||||
},
|
||||
nonRecurringTasks: {
|
||||
missing: { field: 'task.schedule.interval' },
|
||||
aggs: { taskType: { terms: { size: 3, field: 'task.taskType' } } },
|
||||
missing: { field: 'task.schedule' },
|
||||
},
|
||||
ownerIds: {
|
||||
filter: { range: { 'task.startedAt': { gte: 'now-1w/w' } } },
|
||||
aggs: { ownerIds: { cardinality: { field: 'task.ownerId' } } },
|
||||
filter: {
|
||||
range: {
|
||||
'task.startedAt': {
|
||||
gte: 'now-1w/w',
|
||||
},
|
||||
},
|
||||
},
|
||||
aggs: {
|
||||
ownerIds: {
|
||||
cardinality: {
|
||||
field: 'task.ownerId',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
idleTasks: {
|
||||
filter: { term: { 'task.status': 'idle' } },
|
||||
filter: {
|
||||
term: { 'task.status': 'idle' },
|
||||
},
|
||||
aggs: {
|
||||
scheduleDensity: {
|
||||
range: { field: 'task.runAt', ranges: [{ from: 'now', to: 'now+1m' }] },
|
||||
range: {
|
||||
field: 'task.runAt',
|
||||
ranges: [{ from: 'now', to: 'now+1m' }],
|
||||
},
|
||||
aggs: {
|
||||
histogram: {
|
||||
date_histogram: { field: 'task.runAt', fixed_interval: '3s' },
|
||||
aggs: { interval: { terms: { field: 'task.schedule.interval' } } },
|
||||
date_histogram: {
|
||||
field: 'task.runAt',
|
||||
fixed_interval: '3s',
|
||||
},
|
||||
aggs: {
|
||||
interval: {
|
||||
terms: {
|
||||
field: 'task.schedule.interval',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
overdue: {
|
||||
filter: { range: { 'task.runAt': { lt: 'now' } } },
|
||||
filter: {
|
||||
range: {
|
||||
'task.runAt': { lt: 'now' },
|
||||
},
|
||||
},
|
||||
aggs: {
|
||||
nonRecurring: { missing: { field: 'task.schedule.interval' } },
|
||||
taskTypes: { terms: { size: 3, field: 'task.taskType' } },
|
||||
nonRecurring: {
|
||||
missing: { field: 'task.schedule' },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -203,18 +194,36 @@ describe('Workload Statistics Aggregator', () => {
|
|||
|
||||
const mockAggregatedResult = () =>
|
||||
asApiResponse({
|
||||
hits: { hits: [], max_score: 0, total: { value: 4, relation: 'eq' } },
|
||||
hits: {
|
||||
hits: [],
|
||||
max_score: 0,
|
||||
total: { value: 4, relation: 'eq' },
|
||||
},
|
||||
took: 1,
|
||||
timed_out: false,
|
||||
_shards: { total: 1, successful: 1, skipped: 1, failed: 0 },
|
||||
_shards: {
|
||||
total: 1,
|
||||
successful: 1,
|
||||
skipped: 1,
|
||||
failed: 0,
|
||||
},
|
||||
aggregations: {
|
||||
schedule: {
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
buckets: [
|
||||
{ key: '3600s', doc_count: 1 },
|
||||
{ key: '60s', doc_count: 1 },
|
||||
{ key: '720m', doc_count: 1 },
|
||||
{
|
||||
key: '3600s',
|
||||
doc_count: 1,
|
||||
},
|
||||
{
|
||||
key: '60s',
|
||||
doc_count: 1,
|
||||
},
|
||||
{
|
||||
key: '720m',
|
||||
doc_count: 1,
|
||||
},
|
||||
],
|
||||
},
|
||||
taskType: {
|
||||
|
@ -222,55 +231,66 @@ describe('Workload Statistics Aggregator', () => {
|
|||
sum_other_doc_count: 0,
|
||||
buckets: [
|
||||
{
|
||||
key: 'foo',
|
||||
key: 'actions_telemetry',
|
||||
doc_count: 2,
|
||||
status: {
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
buckets: [{ key: 'idle', doc_count: 2 }],
|
||||
buckets: [
|
||||
{
|
||||
key: 'idle',
|
||||
doc_count: 2,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
{
|
||||
key: 'bar',
|
||||
key: 'alerting_telemetry',
|
||||
doc_count: 1,
|
||||
status: {
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
buckets: [{ key: 'idle', doc_count: 1 }],
|
||||
buckets: [
|
||||
{
|
||||
key: 'idle',
|
||||
doc_count: 1,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
{
|
||||
key: 'report',
|
||||
key: 'session_cleanup',
|
||||
doc_count: 1,
|
||||
status: {
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
buckets: [{ key: 'idle', doc_count: 1 }],
|
||||
buckets: [
|
||||
{
|
||||
key: 'idle',
|
||||
doc_count: 1,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
nonRecurringTasks: {
|
||||
doc_count: 1,
|
||||
taskType: {
|
||||
buckets: [{ key: 'report', doc_count: 1 }],
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
doc_count: 13,
|
||||
},
|
||||
ownerIds: {
|
||||
ownerIds: {
|
||||
value: 1,
|
||||
},
|
||||
},
|
||||
ownerIds: { ownerIds: { value: 1 } },
|
||||
// The `FiltersAggregate` doesn't cover the case of a nested `AggregationsAggregationContainer`, in which `FiltersAggregate`
|
||||
// would not have a `buckets` property, but rather a keyed property that's inferred from the request.
|
||||
// @ts-expect-error
|
||||
idleTasks: {
|
||||
doc_count: 3,
|
||||
doc_count: 13,
|
||||
overdue: {
|
||||
doc_count: 2,
|
||||
nonRecurring: { doc_count: 1 },
|
||||
taskTypes: {
|
||||
buckets: [{ key: 'foo', doc_count: 1 }],
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
doc_count: 6,
|
||||
nonRecurring: {
|
||||
doc_count: 6,
|
||||
},
|
||||
},
|
||||
scheduleDensity: {
|
||||
|
@ -286,25 +306,23 @@ describe('Workload Statistics Aggregator', () => {
|
|||
const taskStore = taskStoreMock.create({});
|
||||
taskStore.aggregate.mockResolvedValue(mockAggregatedResult());
|
||||
|
||||
const workloadAggregator = createWorkloadAggregator({
|
||||
const workloadAggregator = createWorkloadAggregator(
|
||||
taskStore,
|
||||
elasticsearchAndSOAvailability$: of(true),
|
||||
refreshInterval: 10,
|
||||
pollInterval: 3000,
|
||||
logger,
|
||||
taskDefinitions: definitions,
|
||||
});
|
||||
of(true),
|
||||
10,
|
||||
3000,
|
||||
loggingSystemMock.create().get()
|
||||
);
|
||||
|
||||
return new Promise<void>((resolve) => {
|
||||
workloadAggregator.pipe(first()).subscribe((result) => {
|
||||
expect(result.key).toEqual('workload');
|
||||
expect(result.value).toMatchObject({
|
||||
count: 4,
|
||||
cost: 15,
|
||||
task_types: {
|
||||
foo: { count: 2, cost: 4, status: { idle: 2 } },
|
||||
bar: { count: 1, cost: 1, status: { idle: 1 } },
|
||||
report: { count: 1, cost: 10, status: { idle: 1 } },
|
||||
actions_telemetry: { count: 2, status: { idle: 2 } },
|
||||
alerting_telemetry: { count: 1, status: { idle: 1 } },
|
||||
session_cleanup: { count: 1, status: { idle: 1 } },
|
||||
},
|
||||
});
|
||||
resolve();
|
||||
|
@ -318,14 +336,13 @@ describe('Workload Statistics Aggregator', () => {
|
|||
|
||||
const availability$ = new Subject<boolean>();
|
||||
|
||||
const workloadAggregator = createWorkloadAggregator({
|
||||
const workloadAggregator = createWorkloadAggregator(
|
||||
taskStore,
|
||||
elasticsearchAndSOAvailability$: of(true),
|
||||
refreshInterval: 10,
|
||||
pollInterval: 3000,
|
||||
logger,
|
||||
taskDefinitions: definitions,
|
||||
});
|
||||
availability$,
|
||||
10,
|
||||
3000,
|
||||
loggingSystemMock.create().get()
|
||||
);
|
||||
|
||||
return new Promise<void>(async (resolve, reject) => {
|
||||
try {
|
||||
|
@ -333,11 +350,25 @@ describe('Workload Statistics Aggregator', () => {
|
|||
expect(result.key).toEqual('workload');
|
||||
expect(result.value).toMatchObject({
|
||||
count: 4,
|
||||
cost: 15,
|
||||
task_types: {
|
||||
foo: { count: 2, cost: 4, status: { idle: 2 } },
|
||||
bar: { count: 1, cost: 1, status: { idle: 1 } },
|
||||
report: { count: 1, cost: 10, status: { idle: 1 } },
|
||||
actions_telemetry: {
|
||||
count: 2,
|
||||
status: {
|
||||
idle: 2,
|
||||
},
|
||||
},
|
||||
alerting_telemetry: {
|
||||
count: 1,
|
||||
status: {
|
||||
idle: 1,
|
||||
},
|
||||
},
|
||||
session_cleanup: {
|
||||
count: 1,
|
||||
status: {
|
||||
idle: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
resolve();
|
||||
|
@ -358,22 +389,19 @@ describe('Workload Statistics Aggregator', () => {
|
|||
const taskStore = taskStoreMock.create({});
|
||||
taskStore.aggregate.mockResolvedValue(mockAggregatedResult());
|
||||
|
||||
const workloadAggregator = createWorkloadAggregator({
|
||||
const workloadAggregator = createWorkloadAggregator(
|
||||
taskStore,
|
||||
elasticsearchAndSOAvailability$: of(true),
|
||||
refreshInterval: 10,
|
||||
pollInterval: 3000,
|
||||
logger,
|
||||
taskDefinitions: definitions,
|
||||
});
|
||||
of(true),
|
||||
10,
|
||||
3000,
|
||||
loggingSystemMock.create().get()
|
||||
);
|
||||
|
||||
return new Promise<void>((resolve) => {
|
||||
workloadAggregator.pipe(first()).subscribe((result) => {
|
||||
expect(result.key).toEqual('workload');
|
||||
expect(result.value).toMatchObject({
|
||||
overdue: 2,
|
||||
overdue_cost: 2,
|
||||
overdue_non_recurring: 1,
|
||||
overdue: 6,
|
||||
});
|
||||
resolve();
|
||||
});
|
||||
|
@ -384,14 +412,13 @@ describe('Workload Statistics Aggregator', () => {
|
|||
const taskStore = taskStoreMock.create({});
|
||||
taskStore.aggregate.mockResolvedValue(mockAggregatedResult());
|
||||
|
||||
const workloadAggregator = createWorkloadAggregator({
|
||||
const workloadAggregator = createWorkloadAggregator(
|
||||
taskStore,
|
||||
elasticsearchAndSOAvailability$: of(true),
|
||||
refreshInterval: 10,
|
||||
pollInterval: 3000,
|
||||
logger,
|
||||
taskDefinitions: definitions,
|
||||
});
|
||||
of(true),
|
||||
10,
|
||||
3000,
|
||||
loggingSystemMock.create().get()
|
||||
);
|
||||
|
||||
return new Promise<void>((resolve) => {
|
||||
workloadAggregator.pipe(first()).subscribe((result) => {
|
||||
|
@ -413,14 +440,13 @@ describe('Workload Statistics Aggregator', () => {
|
|||
const taskStore = taskStoreMock.create({});
|
||||
taskStore.aggregate.mockResolvedValue(mockAggregatedResult());
|
||||
|
||||
const workloadAggregator = createWorkloadAggregator({
|
||||
const workloadAggregator = createWorkloadAggregator(
|
||||
taskStore,
|
||||
elasticsearchAndSOAvailability$: of(true),
|
||||
refreshInterval: 60 * 1000,
|
||||
pollInterval: 3000,
|
||||
logger,
|
||||
taskDefinitions: definitions,
|
||||
});
|
||||
of(true),
|
||||
60 * 1000,
|
||||
3000,
|
||||
loggingSystemMock.create().get()
|
||||
);
|
||||
|
||||
return new Promise<void>((resolve) => {
|
||||
workloadAggregator.pipe(first()).subscribe(() => {
|
||||
|
@ -452,14 +478,13 @@ describe('Workload Statistics Aggregator', () => {
|
|||
const taskStore = taskStoreMock.create({});
|
||||
taskStore.aggregate.mockResolvedValue(mockAggregatedResult());
|
||||
|
||||
const workloadAggregator = createWorkloadAggregator({
|
||||
const workloadAggregator = createWorkloadAggregator(
|
||||
taskStore,
|
||||
elasticsearchAndSOAvailability$: of(true),
|
||||
refreshInterval: 15 * 60 * 1000,
|
||||
pollInterval: 3000,
|
||||
logger,
|
||||
taskDefinitions: definitions,
|
||||
});
|
||||
of(true),
|
||||
15 * 60 * 1000,
|
||||
3000,
|
||||
loggingSystemMock.create().get()
|
||||
);
|
||||
|
||||
return new Promise<void>((resolve) => {
|
||||
workloadAggregator.pipe(first()).subscribe((result) => {
|
||||
|
@ -492,41 +517,42 @@ describe('Workload Statistics Aggregator', () => {
|
|||
const taskStore = taskStoreMock.create({});
|
||||
taskStore.aggregate
|
||||
.mockResolvedValueOnce(
|
||||
mockAggregatedResult().then((res) => setTaskTypeCount(res, 'foo', { idle: 2 }))
|
||||
mockAggregatedResult().then((res) =>
|
||||
setTaskTypeCount(res, 'alerting_telemetry', {
|
||||
idle: 2,
|
||||
})
|
||||
)
|
||||
)
|
||||
.mockRejectedValueOnce(new Error('Elasticsearch has gone poof'))
|
||||
.mockResolvedValueOnce(
|
||||
mockAggregatedResult().then((res) => setTaskTypeCount(res, 'foo', { idle: 1, failed: 1 }))
|
||||
mockAggregatedResult().then((res) =>
|
||||
setTaskTypeCount(res, 'alerting_telemetry', {
|
||||
idle: 1,
|
||||
failed: 1,
|
||||
})
|
||||
)
|
||||
);
|
||||
const workloadAggregator = createWorkloadAggregator({
|
||||
taskStore,
|
||||
elasticsearchAndSOAvailability$: of(true),
|
||||
refreshInterval: 10,
|
||||
pollInterval: 3000,
|
||||
logger,
|
||||
taskDefinitions: definitions,
|
||||
});
|
||||
const logger = loggingSystemMock.create().get();
|
||||
const workloadAggregator = createWorkloadAggregator(taskStore, of(true), 10, 3000, logger);
|
||||
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
workloadAggregator.pipe(take(2), bufferCount(2)).subscribe((results) => {
|
||||
expect(results[0].key).toEqual('workload');
|
||||
expect(results[0].value).toMatchObject({
|
||||
count: 4,
|
||||
cost: 15,
|
||||
count: 5,
|
||||
task_types: {
|
||||
bar: { count: 1, cost: 1, status: { idle: 1 } },
|
||||
report: { count: 1, cost: 10, status: { idle: 1 } },
|
||||
foo: { count: 2, cost: 4, status: { idle: 2 } },
|
||||
actions_telemetry: { count: 2, status: { idle: 2 } },
|
||||
alerting_telemetry: { count: 2, status: { idle: 2 } },
|
||||
session_cleanup: { count: 1, status: { idle: 1 } },
|
||||
},
|
||||
});
|
||||
expect(results[1].key).toEqual('workload');
|
||||
expect(results[1].value).toMatchObject({
|
||||
count: 4,
|
||||
cost: 15,
|
||||
count: 5,
|
||||
task_types: {
|
||||
bar: { count: 1, cost: 1, status: { idle: 1 } },
|
||||
report: { count: 1, cost: 10, status: { idle: 1 } },
|
||||
foo: { count: 2, cost: 4, status: { idle: 1, failed: 1 } },
|
||||
actions_telemetry: { count: 2, status: { idle: 2 } },
|
||||
alerting_telemetry: { count: 2, status: { idle: 1, failed: 1 } },
|
||||
session_cleanup: { count: 1, status: { idle: 1 } },
|
||||
},
|
||||
});
|
||||
resolve();
|
||||
|
@ -541,27 +567,49 @@ describe('Workload Statistics Aggregator', () => {
|
|||
const taskStore = taskStoreMock.create({});
|
||||
taskStore.aggregate.mockResolvedValue(
|
||||
asApiResponse({
|
||||
hits: { hits: [], max_score: 0, total: { value: 4, relation: 'eq' } },
|
||||
hits: {
|
||||
hits: [],
|
||||
max_score: 0,
|
||||
total: { value: 4, relation: 'eq' },
|
||||
},
|
||||
took: 1,
|
||||
timed_out: false,
|
||||
_shards: { total: 1, successful: 1, skipped: 1, failed: 0 },
|
||||
_shards: {
|
||||
total: 1,
|
||||
successful: 1,
|
||||
skipped: 1,
|
||||
failed: 0,
|
||||
},
|
||||
aggregations: {
|
||||
schedule: {
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
buckets: [
|
||||
// repeats each cycle
|
||||
{ key: `${pollingIntervalInSeconds}s`, doc_count: 1 },
|
||||
// 6 times per minute
|
||||
{ key: `10s`, doc_count: 20 },
|
||||
// 1 times per minute
|
||||
{ key: `60s`, doc_count: 10 },
|
||||
// 4 times per hour
|
||||
{ key: '15m', doc_count: 90 },
|
||||
// 2 times per day
|
||||
{ key: '720m', doc_count: 10 },
|
||||
// 8 times per day
|
||||
{ key: '3h', doc_count: 100 },
|
||||
{
|
||||
key: `${pollingIntervalInSeconds}s`,
|
||||
doc_count: 1,
|
||||
},
|
||||
{
|
||||
key: `10s`, // 6 times per minute
|
||||
doc_count: 20,
|
||||
},
|
||||
{
|
||||
key: `60s`, // 1 times per minute
|
||||
doc_count: 10,
|
||||
},
|
||||
{
|
||||
key: '15m', // 4 times per hour
|
||||
doc_count: 90,
|
||||
},
|
||||
{
|
||||
key: '720m', // 2 times per day
|
||||
doc_count: 10,
|
||||
},
|
||||
{
|
||||
key: '3h', // 8 times per day
|
||||
doc_count: 100,
|
||||
},
|
||||
],
|
||||
},
|
||||
taskType: {
|
||||
|
@ -571,13 +619,12 @@ describe('Workload Statistics Aggregator', () => {
|
|||
},
|
||||
nonRecurringTasks: {
|
||||
doc_count: 13,
|
||||
taskType: {
|
||||
buckets: [{ key: 'report', doc_count: 13 }],
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
},
|
||||
ownerIds: {
|
||||
ownerIds: {
|
||||
value: 3,
|
||||
},
|
||||
},
|
||||
ownerIds: { ownerIds: { value: 3 } },
|
||||
// The `FiltersAggregate` doesn't cover the case of a nested `AggregationContainer`, in which `FiltersAggregate`
|
||||
// would not have a `buckets` property, but rather a keyed property that's inferred from the request.
|
||||
// @ts-expect-error
|
||||
|
@ -585,11 +632,8 @@ describe('Workload Statistics Aggregator', () => {
|
|||
doc_count: 13,
|
||||
overdue: {
|
||||
doc_count: 6,
|
||||
nonRecurring: { doc_count: 0 },
|
||||
taskTypes: {
|
||||
buckets: [{ key: 'foo', doc_count: 6 }],
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
nonRecurring: {
|
||||
doc_count: 0,
|
||||
},
|
||||
},
|
||||
scheduleDensity: {
|
||||
|
@ -602,14 +646,13 @@ describe('Workload Statistics Aggregator', () => {
|
|||
})
|
||||
);
|
||||
|
||||
const workloadAggregator = createWorkloadAggregator({
|
||||
const workloadAggregator = createWorkloadAggregator(
|
||||
taskStore,
|
||||
elasticsearchAndSOAvailability$: of(true),
|
||||
refreshInterval: 10,
|
||||
pollInterval: pollingIntervalInSeconds * 1000,
|
||||
logger,
|
||||
taskDefinitions: definitions,
|
||||
});
|
||||
of(true),
|
||||
10,
|
||||
pollingIntervalInSeconds * 1000,
|
||||
loggingSystemMock.create().get()
|
||||
);
|
||||
|
||||
return new Promise<void>((resolve) => {
|
||||
workloadAggregator.pipe(first()).subscribe((result) => {
|
||||
|
@ -617,7 +660,7 @@ describe('Workload Statistics Aggregator', () => {
|
|||
|
||||
expect(result.value).toMatchObject({
|
||||
capacity_requirements: {
|
||||
// these are buckets of required capacity, rather than aggregated requirements.
|
||||
// these are buckets of required capacity, rather than aggregated requirmenets.
|
||||
per_minute: 150,
|
||||
per_hour: 360,
|
||||
per_day: 820,
|
||||
|
@ -632,14 +675,14 @@ describe('Workload Statistics Aggregator', () => {
|
|||
const refreshInterval = 1000;
|
||||
|
||||
const taskStore = taskStoreMock.create({});
|
||||
const workloadAggregator = createWorkloadAggregator({
|
||||
const logger = loggingSystemMock.create().get();
|
||||
const workloadAggregator = createWorkloadAggregator(
|
||||
taskStore,
|
||||
elasticsearchAndSOAvailability$: of(true),
|
||||
of(true),
|
||||
refreshInterval,
|
||||
pollInterval: 3000,
|
||||
logger,
|
||||
taskDefinitions: definitions,
|
||||
});
|
||||
3000,
|
||||
logger
|
||||
);
|
||||
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
let errorWasThrowAt = 0;
|
||||
|
@ -651,7 +694,9 @@ describe('Workload Statistics Aggregator', () => {
|
|||
reject(new Error(`Elasticsearch is still poof`));
|
||||
}
|
||||
|
||||
return setTaskTypeCount(await mockAggregatedResult(), 'foo', { idle: 2 });
|
||||
return setTaskTypeCount(await mockAggregatedResult(), 'alerting_telemetry', {
|
||||
idle: 2,
|
||||
});
|
||||
});
|
||||
|
||||
workloadAggregator.pipe(take(2), bufferCount(2)).subscribe((results) => {
|
||||
|
@ -754,7 +799,7 @@ describe('estimateRecurringTaskScheduling', () => {
|
|||
});
|
||||
|
||||
describe('padBuckets', () => {
|
||||
test('returns zeroed out buckets when there are no buckets in the histogram', async () => {
|
||||
test('returns zeroed out bucklets when there are no buckets in the histogram', async () => {
|
||||
expect(
|
||||
padBuckets(10, 3000, {
|
||||
key: '2020-10-02T19:47:28.128Z-2020-10-02T19:48:28.128Z',
|
||||
|
|
|
@ -16,9 +16,7 @@ import { AggregatedStatProvider } from '../lib/runtime_statistics_aggregator';
|
|||
import { parseIntervalAsSecond, asInterval, parseIntervalAsMillisecond } from '../lib/intervals';
|
||||
import { HealthStatus } from './monitoring_stats_stream';
|
||||
import { TaskStore } from '../task_store';
|
||||
import { createRunningAveragedStat } from './task_run_calculators';
|
||||
import { TaskTypeDictionary } from '../task_type_dictionary';
|
||||
import { TaskCost } from '../task';
|
||||
import { createRunningAveragedStat } from './task_run_calcultors';
|
||||
|
||||
interface StatusStat extends JsonObject {
|
||||
[status: string]: number;
|
||||
|
@ -26,20 +24,16 @@ interface StatusStat extends JsonObject {
|
|||
interface TaskTypeStat extends JsonObject {
|
||||
[taskType: string]: {
|
||||
count: number;
|
||||
cost: number;
|
||||
status: StatusStat;
|
||||
};
|
||||
}
|
||||
|
||||
interface RawWorkloadStat extends JsonObject {
|
||||
count: number;
|
||||
cost: number;
|
||||
task_types: TaskTypeStat;
|
||||
schedule: Array<[string, number]>;
|
||||
non_recurring: number;
|
||||
non_recurring_cost: number;
|
||||
overdue: number;
|
||||
overdue_cost: number;
|
||||
overdue_non_recurring: number;
|
||||
estimated_schedule_density: number[];
|
||||
capacity_requirements: CapacityRequirements;
|
||||
|
@ -115,34 +109,22 @@ type ScheduleDensityResult = AggregationResultOf<
|
|||
type ScheduledIntervals = ScheduleDensityResult['histogram']['buckets'][0];
|
||||
|
||||
// Set an upper bound just in case a customer sets a really high refresh rate
|
||||
const MAX_SCHEDULE_DENSITY_BUCKETS = 50;
|
||||
const MAX_SHCEDULE_DENSITY_BUCKETS = 50;
|
||||
|
||||
interface CreateWorkloadAggregatorOpts {
|
||||
taskStore: TaskStore;
|
||||
elasticsearchAndSOAvailability$: Observable<boolean>;
|
||||
refreshInterval: number;
|
||||
pollInterval: number;
|
||||
logger: Logger;
|
||||
taskDefinitions: TaskTypeDictionary;
|
||||
}
|
||||
|
||||
export function createWorkloadAggregator({
|
||||
taskStore,
|
||||
elasticsearchAndSOAvailability$,
|
||||
refreshInterval,
|
||||
pollInterval,
|
||||
logger,
|
||||
taskDefinitions,
|
||||
}: CreateWorkloadAggregatorOpts): AggregatedStatProvider<WorkloadStat> {
|
||||
export function createWorkloadAggregator(
|
||||
taskStore: TaskStore,
|
||||
elasticsearchAndSOAvailability$: Observable<boolean>,
|
||||
refreshInterval: number,
|
||||
pollInterval: number,
|
||||
logger: Logger
|
||||
): AggregatedStatProvider<WorkloadStat> {
|
||||
// calculate scheduleDensity going two refreshIntervals or 1 minute into into the future
|
||||
// (the longer of the two)
|
||||
const scheduleDensityBuckets = Math.min(
|
||||
Math.max(Math.round(60000 / pollInterval), Math.round((refreshInterval * 2) / pollInterval)),
|
||||
MAX_SCHEDULE_DENSITY_BUCKETS
|
||||
MAX_SHCEDULE_DENSITY_BUCKETS
|
||||
);
|
||||
|
||||
const totalNumTaskDefinitions = taskDefinitions.getAllTypes().length;
|
||||
const taskTypeTermAggSize = Math.min(totalNumTaskDefinitions, 10000);
|
||||
const ownerIdsQueue = createRunningAveragedStat<number>(scheduleDensityBuckets);
|
||||
|
||||
return combineLatest([timer(0, refreshInterval), elasticsearchAndSOAvailability$]).pipe(
|
||||
|
@ -151,24 +133,39 @@ export function createWorkloadAggregator({
|
|||
taskStore.aggregate({
|
||||
aggs: {
|
||||
taskType: {
|
||||
terms: { size: taskTypeTermAggSize, field: 'task.taskType' },
|
||||
aggs: { status: { terms: { field: 'task.status' } } },
|
||||
terms: { size: 100, field: 'task.taskType' },
|
||||
aggs: {
|
||||
status: {
|
||||
terms: { field: 'task.status' },
|
||||
},
|
||||
},
|
||||
},
|
||||
schedule: {
|
||||
terms: { field: 'task.schedule.interval', size: 100 },
|
||||
},
|
||||
nonRecurringTasks: {
|
||||
missing: { field: 'task.schedule.interval' },
|
||||
aggs: {
|
||||
taskType: { terms: { size: taskTypeTermAggSize, field: 'task.taskType' } },
|
||||
},
|
||||
missing: { field: 'task.schedule' },
|
||||
},
|
||||
ownerIds: {
|
||||
filter: { range: { 'task.startedAt': { gte: 'now-1w/w' } } },
|
||||
aggs: { ownerIds: { cardinality: { field: 'task.ownerId' } } },
|
||||
filter: {
|
||||
range: {
|
||||
'task.startedAt': {
|
||||
gte: 'now-1w/w',
|
||||
},
|
||||
},
|
||||
},
|
||||
aggs: {
|
||||
ownerIds: {
|
||||
cardinality: {
|
||||
field: 'task.ownerId',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
idleTasks: {
|
||||
filter: { term: { 'task.status': 'idle' } },
|
||||
filter: {
|
||||
term: { 'task.status': 'idle' },
|
||||
},
|
||||
aggs: {
|
||||
scheduleDensity: {
|
||||
// create a window of upcoming tasks
|
||||
|
@ -190,7 +187,7 @@ export function createWorkloadAggregator({
|
|||
field: 'task.runAt',
|
||||
fixed_interval: asInterval(pollInterval),
|
||||
},
|
||||
// break down each bucket in the histogram by schedule
|
||||
// break down each bucket in the historgram by schedule
|
||||
aggs: {
|
||||
interval: {
|
||||
terms: { field: 'task.schedule.interval' },
|
||||
|
@ -200,10 +197,15 @@ export function createWorkloadAggregator({
|
|||
},
|
||||
},
|
||||
overdue: {
|
||||
filter: { range: { 'task.runAt': { lt: 'now' } } },
|
||||
filter: {
|
||||
range: {
|
||||
'task.runAt': { lt: 'now' },
|
||||
},
|
||||
},
|
||||
aggs: {
|
||||
taskTypes: { terms: { size: taskTypeTermAggSize, field: 'task.taskType' } },
|
||||
nonRecurring: { missing: { field: 'task.schedule.interval' } },
|
||||
nonRecurring: {
|
||||
missing: { field: 'task.schedule' },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -224,13 +226,11 @@ export function createWorkloadAggregator({
|
|||
|
||||
const taskTypes = aggregations.taskType.buckets;
|
||||
const nonRecurring = aggregations.nonRecurringTasks.doc_count;
|
||||
const nonRecurringTaskTypes = aggregations.nonRecurringTasks.taskType.buckets;
|
||||
const ownerIds = aggregations.ownerIds.ownerIds.value;
|
||||
|
||||
const {
|
||||
overdue: {
|
||||
doc_count: overdue,
|
||||
taskTypes: { buckets: taskTypesOverdue = [] } = {},
|
||||
nonRecurring: { doc_count: overdueNonRecurring },
|
||||
},
|
||||
scheduleDensity: { buckets: [scheduleDensity] = [] } = {},
|
||||
|
@ -243,7 +243,6 @@ export function createWorkloadAggregator({
|
|||
asSeconds: parseIntervalAsSecond(schedule.key as string),
|
||||
count: schedule.doc_count,
|
||||
};
|
||||
|
||||
accm.schedules.push(parsedSchedule);
|
||||
if (parsedSchedule.asSeconds <= 60) {
|
||||
accm.cadence.perMinute +=
|
||||
|
@ -258,7 +257,11 @@ export function createWorkloadAggregator({
|
|||
return accm;
|
||||
},
|
||||
{
|
||||
cadence: { perMinute: 0, perHour: 0, perDay: 0 },
|
||||
cadence: {
|
||||
perMinute: 0,
|
||||
perHour: 0,
|
||||
perDay: 0,
|
||||
},
|
||||
schedules: [] as Array<{
|
||||
interval: string;
|
||||
asSeconds: number;
|
||||
|
@ -267,36 +270,20 @@ export function createWorkloadAggregator({
|
|||
}
|
||||
);
|
||||
|
||||
const totalNonRecurringCost = getTotalCost(nonRecurringTaskTypes, taskDefinitions);
|
||||
const totalOverdueCost = getTotalCost(taskTypesOverdue, taskDefinitions);
|
||||
|
||||
let totalCost = 0;
|
||||
const taskTypeSummary = taskTypes.reduce((acc, bucket) => {
|
||||
const value = bucket as TaskTypeWithStatusBucket;
|
||||
const cost =
|
||||
value.doc_count * taskDefinitions.get(value.key as string)?.cost ?? TaskCost.Normal;
|
||||
totalCost += cost;
|
||||
return Object.assign(acc, {
|
||||
[value.key as string]: {
|
||||
count: value.doc_count,
|
||||
cost,
|
||||
status: mapValues(keyBy(value.status.buckets, 'key'), 'doc_count'),
|
||||
},
|
||||
});
|
||||
}, {});
|
||||
|
||||
const summary: WorkloadStat = {
|
||||
count,
|
||||
cost: totalCost,
|
||||
task_types: taskTypeSummary,
|
||||
task_types: mapValues(keyBy(taskTypes, 'key'), ({ doc_count: docCount, status }) => {
|
||||
return {
|
||||
count: docCount,
|
||||
status: mapValues(keyBy(status.buckets, 'key'), 'doc_count'),
|
||||
};
|
||||
}),
|
||||
non_recurring: nonRecurring,
|
||||
non_recurring_cost: totalNonRecurringCost,
|
||||
owner_ids: ownerIdsQueue(ownerIds),
|
||||
schedule: schedules
|
||||
.sort((scheduleLeft, scheduleRight) => scheduleLeft.asSeconds - scheduleRight.asSeconds)
|
||||
.map((schedule) => [schedule.interval, schedule.count]),
|
||||
overdue,
|
||||
overdue_cost: totalOverdueCost,
|
||||
overdue_non_recurring: overdueNonRecurring,
|
||||
estimated_schedule_density: padBuckets(
|
||||
scheduleDensityBuckets,
|
||||
|
@ -470,37 +457,40 @@ export interface WorkloadAggregationResponse {
|
|||
taskType: TaskTypeAggregation;
|
||||
schedule: ScheduleAggregation;
|
||||
idleTasks: IdleTasksAggregation;
|
||||
nonRecurringTasks: { doc_count: number; taskType: TaskTypeAggregation };
|
||||
ownerIds: { ownerIds: { value: number } };
|
||||
nonRecurringTasks: {
|
||||
doc_count: number;
|
||||
};
|
||||
ownerIds: {
|
||||
ownerIds: {
|
||||
value: number;
|
||||
};
|
||||
};
|
||||
[otherAggs: string]: estypes.AggregationsAggregate;
|
||||
}
|
||||
|
||||
export type TaskTypeWithStatusBucket = TaskTypeBucket & {
|
||||
status: {
|
||||
buckets: Array<{
|
||||
doc_count: number;
|
||||
key: string | number;
|
||||
}>;
|
||||
doc_count_error_upper_bound?: number | undefined;
|
||||
sum_other_doc_count?: number | undefined;
|
||||
};
|
||||
};
|
||||
|
||||
export interface TaskTypeBucket {
|
||||
doc_count: number;
|
||||
key: string | number;
|
||||
}
|
||||
|
||||
// @ts-expect-error key doesn't accept a string
|
||||
export interface TaskTypeAggregation extends estypes.AggregationsFiltersAggregate {
|
||||
buckets: Array<TaskTypeBucket | TaskTypeWithStatusBucket>;
|
||||
buckets: Array<{
|
||||
doc_count: number;
|
||||
key: string | number;
|
||||
status: {
|
||||
buckets: Array<{
|
||||
doc_count: number;
|
||||
key: string | number;
|
||||
}>;
|
||||
doc_count_error_upper_bound?: number | undefined;
|
||||
sum_other_doc_count?: number | undefined;
|
||||
};
|
||||
}>;
|
||||
doc_count_error_upper_bound?: number | undefined;
|
||||
sum_other_doc_count?: number | undefined;
|
||||
}
|
||||
|
||||
// @ts-expect-error key doesn't accept a string
|
||||
export interface ScheduleAggregation extends estypes.AggregationsFiltersAggregate {
|
||||
buckets: Array<{ doc_count: number; key: string | number }>;
|
||||
buckets: Array<{
|
||||
doc_count: number;
|
||||
key: string | number;
|
||||
}>;
|
||||
doc_count_error_upper_bound?: number | undefined;
|
||||
sum_other_doc_count?: number | undefined;
|
||||
}
|
||||
|
@ -528,8 +518,9 @@ export interface IdleTasksAggregation extends estypes.AggregationsFiltersAggrega
|
|||
};
|
||||
overdue: {
|
||||
doc_count: number;
|
||||
nonRecurring: { doc_count: number };
|
||||
taskTypes: TaskTypeAggregation;
|
||||
nonRecurring: {
|
||||
doc_count: number;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -546,11 +537,3 @@ interface DateRangeBucket {
|
|||
from_as_string?: string;
|
||||
doc_count: number;
|
||||
}
|
||||
|
||||
function getTotalCost(taskTypeBuckets: TaskTypeBucket[], definitions: TaskTypeDictionary): number {
|
||||
let cost = 0;
|
||||
for (const bucket of taskTypeBuckets) {
|
||||
cost += bucket.doc_count * definitions.get(bucket.key as string)?.cost ?? TaskCost.Normal;
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
|
|
|
@ -11,8 +11,6 @@ import { TaskManagerConfig } from './config';
|
|||
import { Subject } from 'rxjs';
|
||||
import { bufferCount, take } from 'rxjs';
|
||||
import { CoreStatus, ServiceStatusLevels } from '@kbn/core/server';
|
||||
import { serverlessPluginMock } from '@kbn/serverless/server/mocks';
|
||||
import { cloudMock } from '@kbn/cloud-plugin/public/mocks';
|
||||
import { taskPollingLifecycleMock } from './polling_lifecycle.mock';
|
||||
import { TaskPollingLifecycle } from './polling_lifecycle';
|
||||
import type { TaskPollingLifecycle as TaskPollingLifecycleClass } from './polling_lifecycle';
|
||||
|
@ -40,6 +38,7 @@ jest.mock('./ephemeral_task_lifecycle', () => {
|
|||
|
||||
const coreStart = coreMock.createStart();
|
||||
const pluginInitializerContextParams = {
|
||||
max_workers: 10,
|
||||
max_attempts: 9,
|
||||
poll_interval: 3000,
|
||||
version_conflict_threshold: 80,
|
||||
|
@ -149,10 +148,7 @@ describe('TaskManagerPlugin', () => {
|
|||
pluginInitializerContext.node.roles.backgroundTasks = true;
|
||||
const taskManagerPlugin = new TaskManagerPlugin(pluginInitializerContext);
|
||||
taskManagerPlugin.setup(coreMock.createSetup(), { usageCollection: undefined });
|
||||
taskManagerPlugin.start(coreStart, {
|
||||
serverless: serverlessPluginMock.createStartContract(),
|
||||
cloud: cloudMock.createStart(),
|
||||
});
|
||||
taskManagerPlugin.start(coreStart);
|
||||
|
||||
expect(TaskPollingLifecycle as jest.Mock<TaskPollingLifecycleClass>).toHaveBeenCalledTimes(1);
|
||||
expect(
|
||||
|
@ -167,10 +163,7 @@ describe('TaskManagerPlugin', () => {
|
|||
pluginInitializerContext.node.roles.backgroundTasks = false;
|
||||
const taskManagerPlugin = new TaskManagerPlugin(pluginInitializerContext);
|
||||
taskManagerPlugin.setup(coreMock.createSetup(), { usageCollection: undefined });
|
||||
taskManagerPlugin.start(coreStart, {
|
||||
serverless: serverlessPluginMock.createStartContract(),
|
||||
cloud: cloudMock.createStart(),
|
||||
});
|
||||
taskManagerPlugin.start(coreStart);
|
||||
|
||||
expect(TaskPollingLifecycle as jest.Mock<TaskPollingLifecycleClass>).not.toHaveBeenCalled();
|
||||
expect(
|
||||
|
|
|
@ -18,8 +18,6 @@ import {
|
|||
ServiceStatusLevels,
|
||||
CoreStatus,
|
||||
} from '@kbn/core/server';
|
||||
import { ServerlessPluginStart } from '@kbn/serverless/server';
|
||||
import type { CloudStart } from '@kbn/cloud-plugin/server';
|
||||
import {
|
||||
registerDeleteInactiveNodesTaskDefinition,
|
||||
scheduleDeleteInactiveNodesTaskDefinition,
|
||||
|
@ -45,7 +43,6 @@ import { setupIntervalLogging } from './lib/log_health_metrics';
|
|||
import { metricsStream, Metrics } from './metrics';
|
||||
import { TaskManagerMetricsCollector } from './metrics/task_metrics_collector';
|
||||
import { TaskPartitioner } from './lib/task_partitioner';
|
||||
import { getDefaultCapacity } from './lib/get_default_capacity';
|
||||
|
||||
export interface TaskManagerSetupContract {
|
||||
/**
|
||||
|
@ -79,11 +76,6 @@ export type TaskManagerStartContract = Pick<
|
|||
getRegisteredTypes: () => string[];
|
||||
};
|
||||
|
||||
export interface TaskManagerPluginStart {
|
||||
cloud?: CloudStart;
|
||||
serverless?: ServerlessPluginStart;
|
||||
}
|
||||
|
||||
const LogHealthForBackgroundTasksOnlyMinutes = 60;
|
||||
|
||||
export class TaskManagerPlugin
|
||||
|
@ -107,7 +99,6 @@ export class TaskManagerPlugin
|
|||
private taskManagerMetricsCollector?: TaskManagerMetricsCollector;
|
||||
private nodeRoles: PluginInitializerContext['node']['roles'];
|
||||
private kibanaDiscoveryService?: KibanaDiscoveryService;
|
||||
private heapSizeLimit: number = 0;
|
||||
|
||||
constructor(private readonly initContext: PluginInitializerContext) {
|
||||
this.initContext = initContext;
|
||||
|
@ -131,13 +122,6 @@ export class TaskManagerPlugin
|
|||
): TaskManagerSetupContract {
|
||||
this.elasticsearchAndSOAvailability$ = getElasticsearchAndSOAvailability(core.status.core$);
|
||||
|
||||
core.metrics
|
||||
.getOpsMetrics$()
|
||||
.pipe(distinctUntilChanged())
|
||||
.subscribe((metrics) => {
|
||||
this.heapSizeLimit = metrics.process.memory.heap.size_limit;
|
||||
});
|
||||
|
||||
setupSavedObjects(core.savedObjects, this.config);
|
||||
this.taskManagerId = this.initContext.env.instanceUuid;
|
||||
|
||||
|
@ -248,10 +232,12 @@ export class TaskManagerPlugin
|
|||
};
|
||||
}
|
||||
|
||||
public start(
|
||||
{ savedObjects, elasticsearch, executionContext, docLinks }: CoreStart,
|
||||
{ cloud, serverless }: TaskManagerPluginStart
|
||||
): TaskManagerStartContract {
|
||||
public start({
|
||||
savedObjects,
|
||||
elasticsearch,
|
||||
executionContext,
|
||||
docLinks,
|
||||
}: CoreStart): TaskManagerStartContract {
|
||||
const savedObjectsRepository = savedObjects.createInternalRepository([
|
||||
TASK_SO_NAME,
|
||||
BACKGROUND_TASK_NODE_SO_NAME,
|
||||
|
@ -281,29 +267,11 @@ export class TaskManagerPlugin
|
|||
requestTimeouts: this.config.request_timeouts,
|
||||
});
|
||||
|
||||
const defaultCapacity = getDefaultCapacity({
|
||||
claimStrategy: this.config?.claim_strategy,
|
||||
heapSizeLimit: this.heapSizeLimit,
|
||||
isCloud: cloud?.isCloudEnabled ?? false,
|
||||
isServerless: !!serverless,
|
||||
isBackgroundTaskNodeOnly: this.isNodeBackgroundTasksOnly(),
|
||||
});
|
||||
|
||||
this.logger.info(
|
||||
`Task manager isCloud=${
|
||||
cloud?.isCloudEnabled ?? false
|
||||
} isServerless=${!!serverless} claimStrategy=${
|
||||
this.config!.claim_strategy
|
||||
} isBackgroundTaskNodeOnly=${this.isNodeBackgroundTasksOnly()} heapSizeLimit=${
|
||||
this.heapSizeLimit
|
||||
} defaultCapacity=${defaultCapacity}`
|
||||
);
|
||||
|
||||
const managedConfiguration = createManagedConfiguration({
|
||||
config: this.config!,
|
||||
errors$: taskStore.errors$,
|
||||
defaultCapacity,
|
||||
logger: this.logger,
|
||||
errors$: taskStore.errors$,
|
||||
startingMaxWorkers: this.config!.max_workers,
|
||||
startingPollInterval: this.config!.poll_interval,
|
||||
});
|
||||
|
||||
// Only poll for tasks if configured to run tasks
|
||||
|
@ -342,17 +310,16 @@ export class TaskManagerPlugin
|
|||
});
|
||||
}
|
||||
|
||||
createMonitoringStats({
|
||||
createMonitoringStats(
|
||||
taskStore,
|
||||
elasticsearchAndSOAvailability$: this.elasticsearchAndSOAvailability$!,
|
||||
config: this.config!,
|
||||
managedConfig: managedConfiguration,
|
||||
logger: this.logger,
|
||||
adHocTaskCounter: this.adHocTaskCounter,
|
||||
taskDefinitions: this.definitions,
|
||||
taskPollingLifecycle: this.taskPollingLifecycle,
|
||||
ephemeralTaskLifecycle: this.ephemeralTaskLifecycle,
|
||||
}).subscribe((stat) => this.monitoringStats$.next(stat));
|
||||
this.elasticsearchAndSOAvailability$!,
|
||||
this.config!,
|
||||
managedConfiguration,
|
||||
this.logger,
|
||||
this.adHocTaskCounter,
|
||||
this.taskPollingLifecycle,
|
||||
this.ephemeralTaskLifecycle
|
||||
).subscribe((stat) => this.monitoringStats$.next(stat));
|
||||
|
||||
metricsStream({
|
||||
config: this.config!,
|
||||
|
|
|
@ -22,10 +22,10 @@ describe('delayOnClaimConflicts', () => {
|
|||
'initializes with a delay of 0',
|
||||
fakeSchedulers(async () => {
|
||||
const pollInterval = 100;
|
||||
const capacity = 10;
|
||||
const maxWorkers = 10;
|
||||
const taskLifecycleEvents$ = new Subject<TaskLifecycleEvent>();
|
||||
const delays = delayOnClaimConflicts(
|
||||
of(capacity),
|
||||
of(maxWorkers),
|
||||
of(pollInterval),
|
||||
taskLifecycleEvents$,
|
||||
80,
|
||||
|
@ -42,11 +42,11 @@ describe('delayOnClaimConflicts', () => {
|
|||
'emits a random delay whenever p50 of claim clashes exceed 80% of available max_workers',
|
||||
fakeSchedulers(async () => {
|
||||
const pollInterval = 100;
|
||||
const capacity = 10;
|
||||
const maxWorkers = 10;
|
||||
const taskLifecycleEvents$ = new Subject<TaskLifecycleEvent>();
|
||||
|
||||
const delays$ = firstValueFrom<number[]>(
|
||||
delayOnClaimConflicts(of(capacity), of(pollInterval), taskLifecycleEvents$, 80, 2).pipe(
|
||||
delayOnClaimConflicts(of(maxWorkers), of(pollInterval), taskLifecycleEvents$, 80, 2).pipe(
|
||||
take(2),
|
||||
bufferCount(2)
|
||||
)
|
||||
|
@ -60,6 +60,7 @@ describe('delayOnClaimConflicts', () => {
|
|||
tasksUpdated: 0,
|
||||
tasksConflicted: 8,
|
||||
tasksClaimed: 0,
|
||||
tasksRejected: 0,
|
||||
},
|
||||
docs: [],
|
||||
})
|
||||
|
@ -93,6 +94,7 @@ describe('delayOnClaimConflicts', () => {
|
|||
tasksUpdated: 0,
|
||||
tasksConflicted: 8,
|
||||
tasksClaimed: 0,
|
||||
tasksRejected: 0,
|
||||
},
|
||||
docs: [],
|
||||
})
|
||||
|
@ -109,6 +111,7 @@ describe('delayOnClaimConflicts', () => {
|
|||
tasksUpdated: 0,
|
||||
tasksConflicted: 10,
|
||||
tasksClaimed: 0,
|
||||
tasksRejected: 0,
|
||||
},
|
||||
docs: [],
|
||||
})
|
||||
|
@ -134,14 +137,18 @@ describe('delayOnClaimConflicts', () => {
|
|||
'doesnt emit a new delay when conflicts have reduced',
|
||||
fakeSchedulers(async () => {
|
||||
const pollInterval = 100;
|
||||
const capacity = 10;
|
||||
const maxWorkers = 10;
|
||||
const taskLifecycleEvents$ = new Subject<TaskLifecycleEvent>();
|
||||
|
||||
const handler = jest.fn();
|
||||
|
||||
delayOnClaimConflicts(of(capacity), of(pollInterval), taskLifecycleEvents$, 80, 2).subscribe(
|
||||
handler
|
||||
);
|
||||
delayOnClaimConflicts(
|
||||
of(maxWorkers),
|
||||
of(pollInterval),
|
||||
taskLifecycleEvents$,
|
||||
80,
|
||||
2
|
||||
).subscribe(handler);
|
||||
|
||||
await sleep(0);
|
||||
expect(handler).toHaveBeenCalledWith(0);
|
||||
|
@ -154,6 +161,7 @@ describe('delayOnClaimConflicts', () => {
|
|||
tasksUpdated: 0,
|
||||
tasksConflicted: 8,
|
||||
tasksClaimed: 0,
|
||||
tasksRejected: 0,
|
||||
},
|
||||
docs: [],
|
||||
})
|
||||
|
@ -174,6 +182,7 @@ describe('delayOnClaimConflicts', () => {
|
|||
tasksUpdated: 0,
|
||||
tasksConflicted: 7,
|
||||
tasksClaimed: 0,
|
||||
tasksRejected: 0,
|
||||
},
|
||||
docs: [],
|
||||
})
|
||||
|
@ -192,6 +201,7 @@ describe('delayOnClaimConflicts', () => {
|
|||
tasksUpdated: 0,
|
||||
tasksConflicted: 9,
|
||||
tasksClaimed: 0,
|
||||
tasksRejected: 0,
|
||||
},
|
||||
docs: [],
|
||||
})
|
||||
|
|
|
@ -19,14 +19,13 @@ import { ManagedConfiguration } from '../lib/create_managed_configuration';
|
|||
import { TaskLifecycleEvent } from '../polling_lifecycle';
|
||||
import { isTaskPollingCycleEvent } from '../task_events';
|
||||
import { ClaimAndFillPoolResult } from '../lib/fill_pool';
|
||||
import { createRunningAveragedStat } from '../monitoring/task_run_calculators';
|
||||
import { getCapacityInWorkers } from '../task_pool';
|
||||
import { createRunningAveragedStat } from '../monitoring/task_run_calcultors';
|
||||
|
||||
/**
|
||||
* Emits a delay amount in ms to apply to polling whenever the task store exceeds a threshold of claim claimClashes
|
||||
*/
|
||||
export function delayOnClaimConflicts(
|
||||
capacityConfiguration$: ManagedConfiguration['capacityConfiguration$'],
|
||||
maxWorkersConfiguration$: ManagedConfiguration['maxWorkersConfiguration$'],
|
||||
pollIntervalConfiguration$: ManagedConfiguration['pollIntervalConfiguration$'],
|
||||
taskLifecycleEvents$: Observable<TaskLifecycleEvent>,
|
||||
claimClashesPercentageThreshold: number,
|
||||
|
@ -38,7 +37,7 @@ export function delayOnClaimConflicts(
|
|||
merge(
|
||||
of(0),
|
||||
combineLatest([
|
||||
capacityConfiguration$,
|
||||
maxWorkersConfiguration$,
|
||||
pollIntervalConfiguration$,
|
||||
taskLifecycleEvents$.pipe(
|
||||
map<TaskLifecycleEvent, Option<number>>((taskEvent: TaskLifecycleEvent) =>
|
||||
|
@ -52,10 +51,7 @@ export function delayOnClaimConflicts(
|
|||
map((claimClashes: Option<number>) => (claimClashes as Some<number>).value)
|
||||
),
|
||||
]).pipe(
|
||||
map(([capacity, pollInterval, latestClaimConflicts]) => {
|
||||
// convert capacity to maxWorkers
|
||||
const maxWorkers = getCapacityInWorkers(capacity);
|
||||
|
||||
map(([maxWorkers, pollInterval, latestClaimConflicts]) => {
|
||||
// add latest claimConflict count to queue
|
||||
claimConflictQueue(latestClaimConflicts);
|
||||
|
||||
|
|
|
@ -20,8 +20,6 @@ import { asOk, Err, isErr, isOk, Result } from './lib/result_type';
|
|||
import { FillPoolResult } from './lib/fill_pool';
|
||||
import { ElasticsearchResponseError } from './lib/identify_es_error';
|
||||
import { executionContextServiceMock } from '@kbn/core/server/mocks';
|
||||
import { TaskCost } from './task';
|
||||
import { CLAIM_STRATEGY_MGET } from './config';
|
||||
import { TaskPartitioner } from './lib/task_partitioner';
|
||||
import { KibanaDiscoveryService } from './kibana_discovery_service';
|
||||
|
||||
|
@ -46,6 +44,7 @@ describe('TaskPollingLifecycle', () => {
|
|||
const taskManagerOpts = {
|
||||
config: {
|
||||
enabled: true,
|
||||
max_workers: 10,
|
||||
index: 'foo',
|
||||
max_attempts: 9,
|
||||
poll_interval: 6000000,
|
||||
|
@ -91,8 +90,7 @@ describe('TaskPollingLifecycle', () => {
|
|||
unusedTypes: [],
|
||||
definitions: new TaskTypeDictionary(taskManagerLogger),
|
||||
middleware: createInitialMiddleware(),
|
||||
startingCapacity: 20,
|
||||
capacityConfiguration$: of(20),
|
||||
maxWorkersConfiguration$: of(100),
|
||||
pollIntervalConfiguration$: of(100),
|
||||
executionContext,
|
||||
taskPartitioner: new TaskPartitioner('test', {} as KibanaDiscoveryService),
|
||||
|
@ -107,23 +105,12 @@ describe('TaskPollingLifecycle', () => {
|
|||
afterEach(() => clock.restore());
|
||||
|
||||
describe('start', () => {
|
||||
taskManagerOpts.definitions.registerTaskDefinitions({
|
||||
report: {
|
||||
title: 'report',
|
||||
maxConcurrency: 1,
|
||||
cost: TaskCost.ExtraLarge,
|
||||
createTaskRunner: jest.fn(),
|
||||
},
|
||||
quickReport: {
|
||||
title: 'quickReport',
|
||||
maxConcurrency: 5,
|
||||
createTaskRunner: jest.fn(),
|
||||
},
|
||||
});
|
||||
|
||||
test('begins polling once the ES and SavedObjects services are available', () => {
|
||||
const elasticsearchAndSOAvailability$ = new Subject<boolean>();
|
||||
new TaskPollingLifecycle({ ...taskManagerOpts, elasticsearchAndSOAvailability$ });
|
||||
new TaskPollingLifecycle({
|
||||
...taskManagerOpts,
|
||||
elasticsearchAndSOAvailability$,
|
||||
});
|
||||
|
||||
clock.tick(150);
|
||||
expect(mockTaskClaiming.claimAvailableTasksIfCapacityIsAvailable).not.toHaveBeenCalled();
|
||||
|
@ -134,70 +121,55 @@ describe('TaskPollingLifecycle', () => {
|
|||
expect(mockTaskClaiming.claimAvailableTasksIfCapacityIsAvailable).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test('provides TaskClaiming with the capacity available when strategy = CLAIM_STRATEGY_DEFAULT', () => {
|
||||
test('provides TaskClaiming with the capacity available', () => {
|
||||
const elasticsearchAndSOAvailability$ = new Subject<boolean>();
|
||||
const capacity$ = new Subject<number>();
|
||||
const maxWorkers$ = new Subject<number>();
|
||||
taskManagerOpts.definitions.registerTaskDefinitions({
|
||||
report: {
|
||||
title: 'report',
|
||||
maxConcurrency: 1,
|
||||
createTaskRunner: jest.fn(),
|
||||
},
|
||||
quickReport: {
|
||||
title: 'quickReport',
|
||||
maxConcurrency: 5,
|
||||
createTaskRunner: jest.fn(),
|
||||
},
|
||||
});
|
||||
|
||||
new TaskPollingLifecycle({
|
||||
...taskManagerOpts,
|
||||
elasticsearchAndSOAvailability$,
|
||||
capacityConfiguration$: capacity$,
|
||||
maxWorkersConfiguration$: maxWorkers$,
|
||||
});
|
||||
|
||||
const taskClaimingGetCapacity = (TaskClaiming as jest.Mock<TaskClaimingClass>).mock
|
||||
.calls[0][0].getAvailableCapacity;
|
||||
.calls[0][0].getCapacity;
|
||||
|
||||
capacity$.next(40);
|
||||
expect(taskClaimingGetCapacity()).toEqual(40);
|
||||
maxWorkers$.next(20);
|
||||
expect(taskClaimingGetCapacity()).toEqual(20);
|
||||
expect(taskClaimingGetCapacity('report')).toEqual(1);
|
||||
expect(taskClaimingGetCapacity('quickReport')).toEqual(5);
|
||||
|
||||
capacity$.next(60);
|
||||
expect(taskClaimingGetCapacity()).toEqual(60);
|
||||
maxWorkers$.next(30);
|
||||
expect(taskClaimingGetCapacity()).toEqual(30);
|
||||
expect(taskClaimingGetCapacity('report')).toEqual(1);
|
||||
expect(taskClaimingGetCapacity('quickReport')).toEqual(5);
|
||||
|
||||
capacity$.next(4);
|
||||
expect(taskClaimingGetCapacity()).toEqual(4);
|
||||
maxWorkers$.next(2);
|
||||
expect(taskClaimingGetCapacity()).toEqual(2);
|
||||
expect(taskClaimingGetCapacity('report')).toEqual(1);
|
||||
expect(taskClaimingGetCapacity('quickReport')).toEqual(4);
|
||||
});
|
||||
|
||||
test('provides TaskClaiming with the capacity available when strategy = CLAIM_STRATEGY_MGET', () => {
|
||||
const elasticsearchAndSOAvailability$ = new Subject<boolean>();
|
||||
const capacity$ = new Subject<number>();
|
||||
|
||||
new TaskPollingLifecycle({
|
||||
...taskManagerOpts,
|
||||
config: { ...taskManagerOpts.config, claim_strategy: CLAIM_STRATEGY_MGET },
|
||||
elasticsearchAndSOAvailability$,
|
||||
capacityConfiguration$: capacity$,
|
||||
});
|
||||
|
||||
const taskClaimingGetCapacity = (TaskClaiming as jest.Mock<TaskClaimingClass>).mock
|
||||
.calls[0][0].getAvailableCapacity;
|
||||
|
||||
capacity$.next(40);
|
||||
expect(taskClaimingGetCapacity()).toEqual(80);
|
||||
expect(taskClaimingGetCapacity('report')).toEqual(10);
|
||||
expect(taskClaimingGetCapacity('quickReport')).toEqual(10);
|
||||
|
||||
capacity$.next(60);
|
||||
expect(taskClaimingGetCapacity()).toEqual(120);
|
||||
expect(taskClaimingGetCapacity('report')).toEqual(10);
|
||||
expect(taskClaimingGetCapacity('quickReport')).toEqual(10);
|
||||
|
||||
capacity$.next(4);
|
||||
expect(taskClaimingGetCapacity()).toEqual(8);
|
||||
expect(taskClaimingGetCapacity('report')).toEqual(8);
|
||||
expect(taskClaimingGetCapacity('quickReport')).toEqual(8);
|
||||
expect(taskClaimingGetCapacity('quickReport')).toEqual(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('stop', () => {
|
||||
test('stops polling once the ES and SavedObjects services become unavailable', () => {
|
||||
const elasticsearchAndSOAvailability$ = new Subject<boolean>();
|
||||
new TaskPollingLifecycle({ elasticsearchAndSOAvailability$, ...taskManagerOpts });
|
||||
new TaskPollingLifecycle({
|
||||
elasticsearchAndSOAvailability$,
|
||||
...taskManagerOpts,
|
||||
});
|
||||
|
||||
elasticsearchAndSOAvailability$.next(true);
|
||||
|
||||
|
@ -244,7 +216,7 @@ describe('TaskPollingLifecycle', () => {
|
|||
of(
|
||||
asOk({
|
||||
docs: [],
|
||||
stats: { tasksUpdated: 0, tasksConflicted: 0, tasksClaimed: 0 },
|
||||
stats: { tasksUpdated: 0, tasksConflicted: 0, tasksClaimed: 0, tasksRejected: 0 },
|
||||
})
|
||||
)
|
||||
);
|
||||
|
@ -326,7 +298,7 @@ describe('TaskPollingLifecycle', () => {
|
|||
of(
|
||||
asOk({
|
||||
docs: [],
|
||||
stats: { tasksUpdated: 0, tasksConflicted: 0, tasksClaimed: 0 },
|
||||
stats: { tasksUpdated: 0, tasksConflicted: 0, tasksClaimed: 0, tasksRejected: 0 },
|
||||
})
|
||||
)
|
||||
);
|
||||
|
@ -349,55 +321,6 @@ describe('TaskPollingLifecycle', () => {
|
|||
(event: TaskLifecycleEvent) => event.id === 'workerUtilization'
|
||||
);
|
||||
});
|
||||
|
||||
const workerUtilizationEvent = emittedEvents.find(
|
||||
(event: TaskLifecycleEvent) => event.id === 'workerUtilization'
|
||||
);
|
||||
expect(workerUtilizationEvent).toEqual({
|
||||
id: 'workerUtilization',
|
||||
type: 'TASK_MANAGER_STAT',
|
||||
event: { tag: 'ok', value: 0 },
|
||||
});
|
||||
});
|
||||
|
||||
test('should set utilization to max when capacity is not fully reached but there are tasks left unclaimed', async () => {
|
||||
clock.restore();
|
||||
mockTaskClaiming.claimAvailableTasksIfCapacityIsAvailable.mockImplementation(() =>
|
||||
of(
|
||||
asOk({
|
||||
docs: [],
|
||||
stats: { tasksUpdated: 0, tasksConflicted: 0, tasksClaimed: 0, tasksLeftUnclaimed: 2 },
|
||||
})
|
||||
)
|
||||
);
|
||||
const elasticsearchAndSOAvailability$ = new Subject<boolean>();
|
||||
const taskPollingLifecycle = new TaskPollingLifecycle({
|
||||
...taskManagerOpts,
|
||||
elasticsearchAndSOAvailability$,
|
||||
});
|
||||
|
||||
const emittedEvents: TaskLifecycleEvent[] = [];
|
||||
|
||||
taskPollingLifecycle.events.subscribe((event: TaskLifecycleEvent) =>
|
||||
emittedEvents.push(event)
|
||||
);
|
||||
|
||||
elasticsearchAndSOAvailability$.next(true);
|
||||
expect(mockTaskClaiming.claimAvailableTasksIfCapacityIsAvailable).toHaveBeenCalled();
|
||||
await retryUntil('workerUtilizationEvent emitted', () => {
|
||||
return !!emittedEvents.find(
|
||||
(event: TaskLifecycleEvent) => event.id === 'workerUtilization'
|
||||
);
|
||||
});
|
||||
|
||||
const workerUtilizationEvent = emittedEvents.find(
|
||||
(event: TaskLifecycleEvent) => event.id === 'workerUtilization'
|
||||
);
|
||||
expect(workerUtilizationEvent).toEqual({
|
||||
id: 'workerUtilization',
|
||||
type: 'TASK_MANAGER_STAT',
|
||||
event: { tag: 'ok', value: 100 },
|
||||
});
|
||||
});
|
||||
|
||||
test('should emit event when polling error occurs', async () => {
|
||||
|
|
|
@ -45,8 +45,6 @@ import { TaskClaiming } from './queries/task_claiming';
|
|||
import { ClaimOwnershipResult } from './task_claimers';
|
||||
import { TaskPartitioner } from './lib/task_partitioner';
|
||||
|
||||
const MAX_BUFFER_OPERATIONS = 100;
|
||||
|
||||
export interface ITaskEventEmitter<T> {
|
||||
get events(): Observable<T>;
|
||||
}
|
||||
|
@ -103,7 +101,7 @@ export class TaskPollingLifecycle implements ITaskEventEmitter<TaskLifecycleEven
|
|||
constructor({
|
||||
logger,
|
||||
middleware,
|
||||
capacityConfiguration$,
|
||||
maxWorkersConfiguration$,
|
||||
pollIntervalConfiguration$,
|
||||
// Elasticsearch and SavedObjects availability status
|
||||
elasticsearchAndSOAvailability$,
|
||||
|
@ -126,15 +124,13 @@ export class TaskPollingLifecycle implements ITaskEventEmitter<TaskLifecycleEven
|
|||
const emitEvent = (event: TaskLifecycleEvent) => this.events$.next(event);
|
||||
|
||||
this.bufferedStore = new BufferedTaskStore(this.store, {
|
||||
bufferMaxOperations: MAX_BUFFER_OPERATIONS,
|
||||
bufferMaxOperations: config.max_workers,
|
||||
logger,
|
||||
});
|
||||
|
||||
this.pool = new TaskPool({
|
||||
logger,
|
||||
strategy: config.claim_strategy,
|
||||
capacity$: capacityConfiguration$,
|
||||
definitions: this.definitions,
|
||||
maxWorkers$: maxWorkersConfiguration$,
|
||||
});
|
||||
this.pool.load.subscribe(emitEvent);
|
||||
|
||||
|
@ -146,7 +142,17 @@ export class TaskPollingLifecycle implements ITaskEventEmitter<TaskLifecycleEven
|
|||
definitions,
|
||||
unusedTypes,
|
||||
logger: this.logger,
|
||||
getAvailableCapacity: (taskType?: string) => this.pool.availableCapacity(taskType),
|
||||
getCapacity: (taskType?: string) =>
|
||||
taskType && this.definitions.get(taskType)?.maxConcurrency
|
||||
? Math.max(
|
||||
Math.min(
|
||||
this.pool.availableWorkers,
|
||||
this.definitions.get(taskType)!.maxConcurrency! -
|
||||
this.pool.getOccupiedWorkersByType(taskType)
|
||||
),
|
||||
0
|
||||
)
|
||||
: this.pool.availableWorkers,
|
||||
taskPartitioner,
|
||||
});
|
||||
// pipe taskClaiming events into the lifecycle event stream
|
||||
|
@ -157,7 +163,7 @@ export class TaskPollingLifecycle implements ITaskEventEmitter<TaskLifecycleEven
|
|||
let pollIntervalDelay$: Observable<number> | undefined;
|
||||
if (claimStrategy === CLAIM_STRATEGY_DEFAULT) {
|
||||
pollIntervalDelay$ = delayOnClaimConflicts(
|
||||
capacityConfiguration$,
|
||||
maxWorkersConfiguration$,
|
||||
pollIntervalConfiguration$,
|
||||
this.events$,
|
||||
config.version_conflict_threshold,
|
||||
|
@ -171,22 +177,19 @@ export class TaskPollingLifecycle implements ITaskEventEmitter<TaskLifecycleEven
|
|||
pollInterval$: pollIntervalConfiguration$,
|
||||
pollIntervalDelay$,
|
||||
getCapacity: () => {
|
||||
const capacity = this.pool.availableCapacity();
|
||||
const capacity = this.pool.availableWorkers;
|
||||
if (!capacity) {
|
||||
const usedCapacityPercentage = this.pool.usedCapacityPercentage;
|
||||
|
||||
// if there isn't capacity, emit a load event so that we can expose how often
|
||||
// high load causes the poller to skip work (work isn't called when there is no capacity)
|
||||
this.emitEvent(asTaskManagerStatEvent('load', asOk(usedCapacityPercentage)));
|
||||
this.emitEvent(asTaskManagerStatEvent('load', asOk(this.pool.workerLoad)));
|
||||
|
||||
// Emit event indicating task manager utilization
|
||||
this.emitEvent(asTaskManagerStatEvent('workerUtilization', asOk(usedCapacityPercentage)));
|
||||
this.emitEvent(asTaskManagerStatEvent('workerUtilization', asOk(this.pool.workerLoad)));
|
||||
}
|
||||
return capacity;
|
||||
},
|
||||
work: this.pollForWork,
|
||||
});
|
||||
|
||||
this.subscribeToPoller(poller.events$);
|
||||
|
||||
elasticsearchAndSOAvailability$.subscribe((areESAndSOAvailable) => {
|
||||
|
@ -259,7 +262,7 @@ export class TaskPollingLifecycle implements ITaskEventEmitter<TaskLifecycleEven
|
|||
const [result] = await Promise.all([this.pool.run(tasksToRun), ...removeTaskPromises]);
|
||||
// Emit the load after fetching tasks, giving us a good metric for evaluating how
|
||||
// busy Task manager tends to be in this Kibana instance
|
||||
this.emitEvent(asTaskManagerStatEvent('load', asOk(this.pool.usedCapacityPercentage)));
|
||||
this.emitEvent(asTaskManagerStatEvent('load', asOk(this.pool.workerLoad)));
|
||||
return result;
|
||||
}
|
||||
);
|
||||
|
@ -282,29 +285,16 @@ export class TaskPollingLifecycle implements ITaskEventEmitter<TaskLifecycleEven
|
|||
|
||||
// Emit event indicating task manager utilization % at the end of a polling cycle
|
||||
// Because there was a polling error, no tasks were claimed so this represents the number of workers busy
|
||||
this.emitEvent(
|
||||
asTaskManagerStatEvent('workerUtilization', asOk(this.pool.usedCapacityPercentage))
|
||||
);
|
||||
this.emitEvent(asTaskManagerStatEvent('workerUtilization', asOk(this.pool.workerLoad)));
|
||||
})
|
||||
)
|
||||
)
|
||||
.pipe(
|
||||
tap(
|
||||
mapOk((results: TimedFillPoolResult) => {
|
||||
mapOk(() => {
|
||||
// Emit event indicating task manager utilization % at the end of a polling cycle
|
||||
|
||||
// Get the actual utilization as a percentage
|
||||
let tmUtilization = this.pool.usedCapacityPercentage;
|
||||
|
||||
// Check whether there are any tasks left unclaimed
|
||||
// If we're not at capacity and there are unclaimed tasks, then
|
||||
// there must be high cost tasks that need to be claimed
|
||||
// Artificially inflate the utilization to represent the unclaimed load
|
||||
if (tmUtilization < 100 && (results.stats?.tasksLeftUnclaimed ?? 0) > 0) {
|
||||
tmUtilization = 100;
|
||||
}
|
||||
|
||||
this.emitEvent(asTaskManagerStatEvent('workerUtilization', asOk(tmUtilization)));
|
||||
// This represents the number of workers busy + number of tasks claimed in this cycle
|
||||
this.emitEvent(asTaskManagerStatEvent('workerUtilization', asOk(this.pool.workerLoad)));
|
||||
})
|
||||
)
|
||||
)
|
||||
|
|
|
@ -80,7 +80,7 @@ describe('TaskClaiming', () => {
|
|||
unusedTypes: [],
|
||||
taskStore: taskStoreMock.create({ taskManagerId: '' }),
|
||||
maxAttempts: 2,
|
||||
getAvailableCapacity: () => 10,
|
||||
getCapacity: () => 10,
|
||||
taskPartitioner,
|
||||
});
|
||||
|
||||
|
@ -130,7 +130,7 @@ describe('TaskClaiming', () => {
|
|||
unusedTypes: [],
|
||||
taskStore: taskStoreMock.create({ taskManagerId: '' }),
|
||||
maxAttempts: 2,
|
||||
getAvailableCapacity: () => 10,
|
||||
getCapacity: () => 10,
|
||||
taskPartitioner,
|
||||
});
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ export interface TaskClaimingOpts {
|
|||
taskStore: TaskStore;
|
||||
maxAttempts: number;
|
||||
excludedTaskTypes: string[];
|
||||
getAvailableCapacity: (taskType?: string) => number;
|
||||
getCapacity: (taskType?: string) => number;
|
||||
taskPartitioner: TaskPartitioner;
|
||||
}
|
||||
|
||||
|
@ -87,7 +87,7 @@ export class TaskClaiming {
|
|||
private definitions: TaskTypeDictionary;
|
||||
private events$: Subject<TaskClaim>;
|
||||
private taskStore: TaskStore;
|
||||
private getAvailableCapacity: (taskType?: string) => number;
|
||||
private getCapacity: (taskType?: string) => number;
|
||||
private logger: Logger;
|
||||
private readonly taskClaimingBatchesByType: TaskClaimingBatches;
|
||||
private readonly taskMaxAttempts: Record<string, number>;
|
||||
|
@ -106,7 +106,7 @@ export class TaskClaiming {
|
|||
this.definitions = opts.definitions;
|
||||
this.maxAttempts = opts.maxAttempts;
|
||||
this.taskStore = opts.taskStore;
|
||||
this.getAvailableCapacity = opts.getAvailableCapacity;
|
||||
this.getCapacity = opts.getCapacity;
|
||||
this.logger = opts.logger.get('taskClaiming');
|
||||
this.taskClaimingBatchesByType = this.partitionIntoClaimingBatches(this.definitions);
|
||||
this.taskMaxAttempts = Object.fromEntries(this.normalizeMaxAttempts(this.definitions));
|
||||
|
@ -170,13 +170,13 @@ export class TaskClaiming {
|
|||
public claimAvailableTasksIfCapacityIsAvailable(
|
||||
claimingOptions: Omit<OwnershipClaimingOpts, 'size' | 'taskTypes'>
|
||||
): Observable<Result<ClaimOwnershipResult, FillPoolResult>> {
|
||||
if (this.getAvailableCapacity()) {
|
||||
if (this.getCapacity()) {
|
||||
const opts: TaskClaimerOpts = {
|
||||
batches: this.getClaimingBatches(),
|
||||
claimOwnershipUntil: claimingOptions.claimOwnershipUntil,
|
||||
taskStore: this.taskStore,
|
||||
events$: this.events$,
|
||||
getCapacity: this.getAvailableCapacity,
|
||||
getCapacity: this.getCapacity,
|
||||
unusedTypes: this.unusedTypes,
|
||||
definitions: this.definitions,
|
||||
taskMaxAttempts: this.taskMaxAttempts,
|
||||
|
|
|
@ -823,8 +823,7 @@ function mockHealthStats(overrides = {}) {
|
|||
configuration: {
|
||||
timestamp: new Date().toISOString(),
|
||||
value: {
|
||||
capacity: { config: 10, as_cost: 20, as_workers: 10 },
|
||||
claim_strategy: 'default',
|
||||
max_workers: 10,
|
||||
poll_interval: 3000,
|
||||
request_capacity: 1000,
|
||||
monitored_aggregated_stats_refresh_rate: 5000,
|
||||
|
@ -842,19 +841,16 @@ function mockHealthStats(overrides = {}) {
|
|||
timestamp: new Date().toISOString(),
|
||||
value: {
|
||||
count: 4,
|
||||
cost: 8,
|
||||
task_types: {
|
||||
actions_telemetry: { count: 2, cost: 4, status: { idle: 2 } },
|
||||
alerting_telemetry: { count: 1, cost: 2, status: { idle: 1 } },
|
||||
session_cleanup: { count: 1, cost: 2, status: { idle: 1 } },
|
||||
actions_telemetry: { count: 2, status: { idle: 2 } },
|
||||
alerting_telemetry: { count: 1, status: { idle: 1 } },
|
||||
session_cleanup: { count: 1, status: { idle: 1 } },
|
||||
},
|
||||
schedule: [],
|
||||
overdue: 0,
|
||||
overdue_cost: 2,
|
||||
overdue_non_recurring: 0,
|
||||
estimatedScheduleDensity: [],
|
||||
non_recurring: 20,
|
||||
non_recurring_cost: 40,
|
||||
owner_ids: [0, 0, 0, 1, 2, 0, 0, 2, 2, 2, 1, 2, 1, 1],
|
||||
estimated_schedule_density: [],
|
||||
capacity_requirements: {
|
||||
|
|
|
@ -16,12 +16,6 @@ export enum TaskPriority {
|
|||
Normal = 50,
|
||||
}
|
||||
|
||||
export enum TaskCost {
|
||||
Tiny = 1,
|
||||
Normal = 2,
|
||||
ExtraLarge = 10,
|
||||
}
|
||||
|
||||
/*
|
||||
* Type definitions and validations for tasks.
|
||||
*/
|
||||
|
@ -133,10 +127,6 @@ export const taskDefinitionSchema = schema.object(
|
|||
* Priority of this task type. Defaults to "NORMAL" if not defined
|
||||
*/
|
||||
priority: schema.maybe(schema.number()),
|
||||
/**
|
||||
* Cost to run this task type. Defaults to "Normal".
|
||||
*/
|
||||
cost: schema.number({ defaultValue: TaskCost.Normal }),
|
||||
/**
|
||||
* An optional more detailed description of what this task does.
|
||||
*/
|
||||
|
@ -182,7 +172,7 @@ export const taskDefinitionSchema = schema.object(
|
|||
paramsSchema: schema.maybe(schema.any()),
|
||||
},
|
||||
{
|
||||
validate({ timeout, priority, cost }) {
|
||||
validate({ timeout, priority }) {
|
||||
if (!isInterval(timeout) || isErr(tryAsResult(() => parseIntervalAsMillisecond(timeout)))) {
|
||||
return `Invalid timeout "${timeout}". Timeout must be of the form "{number}{cadance}" where number is an integer. Example: 5m.`;
|
||||
}
|
||||
|
@ -192,12 +182,6 @@ export const taskDefinitionSchema = schema.object(
|
|||
.filter((key) => isNaN(Number(key)))
|
||||
.map((key) => `${key} => ${TaskPriority[key as keyof typeof TaskPriority]}`)}`;
|
||||
}
|
||||
|
||||
if (cost && (!isNumber(cost) || !(cost in TaskCost))) {
|
||||
return `Invalid cost "${cost}". Cost must be one of ${Object.keys(TaskCost)
|
||||
.filter((key) => isNaN(Number(key)))
|
||||
.map((key) => `${key} => ${TaskCost[key as keyof typeof TaskCost]}`)}`;
|
||||
}
|
||||
},
|
||||
}
|
||||
);
|
||||
|
|
|
@ -37,7 +37,6 @@ export interface ClaimOwnershipResult {
|
|||
tasksUpdated: number;
|
||||
tasksConflicted: number;
|
||||
tasksClaimed: number;
|
||||
tasksLeftUnclaimed?: number;
|
||||
};
|
||||
docs: ConcreteTaskInstance[];
|
||||
timing?: TaskTiming;
|
||||
|
@ -62,12 +61,13 @@ export function getTaskClaimer(logger: Logger, strategy: string): TaskClaimerFn
|
|||
return claimAvailableTasksDefault;
|
||||
}
|
||||
|
||||
export function getEmptyClaimOwnershipResult(): ClaimOwnershipResult {
|
||||
export function getEmptyClaimOwnershipResult() {
|
||||
return {
|
||||
stats: {
|
||||
tasksUpdated: 0,
|
||||
tasksConflicted: 0,
|
||||
tasksClaimed: 0,
|
||||
tasksRejected: 0,
|
||||
},
|
||||
docs: [],
|
||||
};
|
||||
|
|
|
@ -133,7 +133,7 @@ describe('TaskClaiming', () => {
|
|||
excludedTaskTypes,
|
||||
unusedTypes: unusedTaskTypes,
|
||||
maxAttempts: taskClaimingOpts.maxAttempts ?? 2,
|
||||
getAvailableCapacity: taskClaimingOpts.getAvailableCapacity ?? (() => 10),
|
||||
getCapacity: taskClaimingOpts.getCapacity ?? (() => 10),
|
||||
taskPartitioner,
|
||||
...taskClaimingOpts,
|
||||
});
|
||||
|
@ -158,7 +158,7 @@ describe('TaskClaiming', () => {
|
|||
excludedTaskTypes?: string[];
|
||||
unusedTaskTypes?: string[];
|
||||
}) {
|
||||
const getCapacity = taskClaimingOpts.getAvailableCapacity ?? (() => 10);
|
||||
const getCapacity = taskClaimingOpts.getCapacity ?? (() => 10);
|
||||
const { taskClaiming, store } = initialiseTestClaiming({
|
||||
storeOpts,
|
||||
taskClaimingOpts,
|
||||
|
@ -447,7 +447,7 @@ if (doc['task.runAt'].size()!=0) {
|
|||
},
|
||||
taskClaimingOpts: {
|
||||
maxAttempts,
|
||||
getAvailableCapacity: (type) => {
|
||||
getCapacity: (type) => {
|
||||
switch (type) {
|
||||
case 'limitedToOne':
|
||||
case 'anotherLimitedToOne':
|
||||
|
@ -577,7 +577,7 @@ if (doc['task.runAt'].size()!=0) {
|
|||
},
|
||||
taskClaimingOpts: {
|
||||
maxAttempts,
|
||||
getAvailableCapacity: (type) => {
|
||||
getCapacity: (type) => {
|
||||
switch (type) {
|
||||
case 'limitedToTwo':
|
||||
return 2;
|
||||
|
@ -686,7 +686,7 @@ if (doc['task.runAt'].size()!=0) {
|
|||
},
|
||||
taskClaimingOpts: {
|
||||
maxAttempts,
|
||||
getAvailableCapacity: (type) => {
|
||||
getCapacity: (type) => {
|
||||
switch (type) {
|
||||
case 'limitedToOne':
|
||||
case 'anotherLimitedToOne':
|
||||
|
@ -1139,7 +1139,7 @@ if (doc['task.runAt'].size()!=0) {
|
|||
storeOpts: {
|
||||
taskManagerId,
|
||||
},
|
||||
taskClaimingOpts: { getAvailableCapacity: () => maxDocs },
|
||||
taskClaimingOpts: { getCapacity: () => maxDocs },
|
||||
claimingOpts: {
|
||||
claimOwnershipUntil,
|
||||
},
|
||||
|
@ -1219,9 +1219,9 @@ if (doc['task.runAt'].size()!=0) {
|
|||
function instantiateStoreWithMockedApiResponses({
|
||||
taskManagerId = uuidv4(),
|
||||
definitions = taskDefinitions,
|
||||
getAvailableCapacity = () => 10,
|
||||
getCapacity = () => 10,
|
||||
tasksClaimed,
|
||||
}: Partial<Pick<TaskClaimingOpts, 'definitions' | 'getAvailableCapacity'>> & {
|
||||
}: Partial<Pick<TaskClaimingOpts, 'definitions' | 'getCapacity'>> & {
|
||||
taskManagerId?: string;
|
||||
tasksClaimed?: ConcreteTaskInstance[][];
|
||||
} = {}) {
|
||||
|
@ -1254,7 +1254,7 @@ if (doc['task.runAt'].size()!=0) {
|
|||
unusedTypes: [],
|
||||
taskStore,
|
||||
maxAttempts: 2,
|
||||
getAvailableCapacity,
|
||||
getCapacity,
|
||||
taskPartitioner,
|
||||
});
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -7,11 +7,9 @@
|
|||
|
||||
// Basic operation of this task claimer:
|
||||
// - search for candidate tasks to run, more than we actually can run
|
||||
// - initial search returns a slimmer task document for I/O efficiency (no params or state)
|
||||
// - for each task found, do an mget to get the current seq_no and primary_term
|
||||
// - if the mget result doesn't match the search result, the task is stale
|
||||
// - from the non-stale search results, return as many as we can run based on available
|
||||
// capacity and the cost of each task type to run
|
||||
// - from the non-stale search results, return as many as we can run
|
||||
|
||||
import { SavedObjectsErrorHelpers } from '@kbn/core/server';
|
||||
|
||||
|
@ -20,7 +18,7 @@ import { Subject, Observable } from 'rxjs';
|
|||
|
||||
import { TaskTypeDictionary } from '../task_type_dictionary';
|
||||
import { TaskClaimerOpts, ClaimOwnershipResult, getEmptyClaimOwnershipResult } from '.';
|
||||
import { ConcreteTaskInstance, TaskStatus, ConcreteTaskInstanceVersion, TaskCost } from '../task';
|
||||
import { ConcreteTaskInstance, TaskStatus, ConcreteTaskInstanceVersion } from '../task';
|
||||
import { TASK_MANAGER_TRANSACTION_TYPE } from '../task_running';
|
||||
import {
|
||||
isLimited,
|
||||
|
@ -114,10 +112,7 @@ async function claimAvailableTasks(opts: TaskClaimerOpts): Promise<ClaimOwnershi
|
|||
taskStore,
|
||||
events$,
|
||||
claimOwnershipUntil,
|
||||
// set size to accommodate the possibility of retrieving all
|
||||
// tasks with the smallest cost, with a size multipler to account
|
||||
// for possible conflicts
|
||||
size: initialCapacity * TaskCost.Tiny * SIZE_MULTIPLIER_FOR_TASK_FETCH,
|
||||
size: initialCapacity * SIZE_MULTIPLIER_FOR_TASK_FETCH,
|
||||
taskMaxAttempts,
|
||||
taskPartitioner,
|
||||
});
|
||||
|
@ -161,54 +156,35 @@ async function claimAvailableTasks(opts: TaskClaimerOpts): Promise<ClaimOwnershi
|
|||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// apply limited concurrency limits (TODO: can currently starve other tasks)
|
||||
const candidateTasks = applyLimitedConcurrency(currentTasks, batches);
|
||||
|
||||
// apply capacity constraint to candidate tasks
|
||||
const tasksToRun: ConcreteTaskInstance[] = [];
|
||||
const leftOverTasks: ConcreteTaskInstance[] = [];
|
||||
|
||||
let capacityAccumulator = 0;
|
||||
for (const task of candidateTasks) {
|
||||
const taskCost = definitions.get(task.taskType)?.cost ?? TaskCost.Normal;
|
||||
if (capacityAccumulator + taskCost <= initialCapacity) {
|
||||
tasksToRun.push(task);
|
||||
capacityAccumulator += taskCost;
|
||||
} else {
|
||||
leftOverTasks.push(task);
|
||||
capacityAccumulator = initialCapacity;
|
||||
}
|
||||
}
|
||||
|
||||
// build the updated task objects we'll claim
|
||||
const taskUpdates: ConcreteTaskInstance[] = [];
|
||||
for (const task of tasksToRun) {
|
||||
taskUpdates.push({
|
||||
...task,
|
||||
scheduledAt:
|
||||
task.retryAt != null && new Date(task.retryAt).getTime() < Date.now()
|
||||
? task.retryAt
|
||||
: task.runAt,
|
||||
status: TaskStatus.Claiming,
|
||||
retryAt: claimOwnershipUntil,
|
||||
ownerId: taskStore.taskManagerId,
|
||||
const taskUpdates: ConcreteTaskInstance[] = Array.from(candidateTasks)
|
||||
.slice(0, initialCapacity)
|
||||
.map((task) => {
|
||||
if (task.retryAt != null && new Date(task.retryAt).getTime() < Date.now()) {
|
||||
task.scheduledAt = task.retryAt;
|
||||
} else {
|
||||
task.scheduledAt = task.runAt;
|
||||
}
|
||||
task.retryAt = claimOwnershipUntil;
|
||||
task.ownerId = taskStore.taskManagerId;
|
||||
task.status = TaskStatus.Claiming;
|
||||
|
||||
return task;
|
||||
});
|
||||
}
|
||||
|
||||
// perform the task object updates, deal with errors
|
||||
const updatedTasks: ConcreteTaskInstance[] = [];
|
||||
const finalResults: ConcreteTaskInstance[] = [];
|
||||
let conflicts = staleTasks.length;
|
||||
let bulkErrors = 0;
|
||||
|
||||
try {
|
||||
const updateResults = await taskStore.bulkUpdate(taskUpdates, {
|
||||
validate: false,
|
||||
excludeLargeFields: true,
|
||||
});
|
||||
const updateResults = await taskStore.bulkUpdate(taskUpdates, { validate: false });
|
||||
for (const updateResult of updateResults) {
|
||||
if (isOk(updateResult)) {
|
||||
updatedTasks.push(updateResult.value);
|
||||
finalResults.push(updateResult.value);
|
||||
} else {
|
||||
const { id, type, error } = updateResult.error;
|
||||
|
||||
|
@ -233,27 +209,6 @@ async function claimAvailableTasks(opts: TaskClaimerOpts): Promise<ClaimOwnershi
|
|||
logger.warn(`Error updating tasks during claim: ${err}`, logMeta);
|
||||
}
|
||||
|
||||
// perform an mget to get the full task instance for claiming
|
||||
let fullTasksToRun: ConcreteTaskInstance[] = [];
|
||||
try {
|
||||
fullTasksToRun = (await taskStore.bulkGet(updatedTasks.map((task) => task.id))).reduce<
|
||||
ConcreteTaskInstance[]
|
||||
>((acc, task) => {
|
||||
if (isOk(task)) {
|
||||
acc.push(task.value);
|
||||
} else {
|
||||
const { id, type, error } = task.error;
|
||||
logger.warn(
|
||||
`Error getting full task ${id}:${type} during claim: ${error.message}`,
|
||||
logMeta
|
||||
);
|
||||
}
|
||||
return acc;
|
||||
}, []);
|
||||
} catch (err) {
|
||||
logger.warn(`Error getting full task documents during claim: ${err}`, logMeta);
|
||||
}
|
||||
|
||||
// separate update for removed tasks; shouldn't happen often, so unlikely
|
||||
// a performance concern, and keeps the rest of the logic simpler
|
||||
let removedCount = 0;
|
||||
|
@ -265,10 +220,7 @@ async function claimAvailableTasks(opts: TaskClaimerOpts): Promise<ClaimOwnershi
|
|||
|
||||
// don't worry too much about errors, we'll get them next time
|
||||
try {
|
||||
const removeResults = await taskStore.bulkUpdate(tasksToRemove, {
|
||||
validate: false,
|
||||
excludeLargeFields: true,
|
||||
});
|
||||
const removeResults = await taskStore.bulkUpdate(tasksToRemove, { validate: false });
|
||||
for (const removeResult of removeResults) {
|
||||
if (isOk(removeResult)) {
|
||||
removedCount++;
|
||||
|
@ -286,22 +238,21 @@ async function claimAvailableTasks(opts: TaskClaimerOpts): Promise<ClaimOwnershi
|
|||
}
|
||||
|
||||
// TODO: need a better way to generate stats
|
||||
const message = `task claimer claimed: ${fullTasksToRun.length}; stale: ${staleTasks.length}; conflicts: ${conflicts}; missing: ${missingTasks.length}; capacity reached: ${leftOverTasks.length}; updateErrors: ${bulkErrors}; removed: ${removedCount};`;
|
||||
const message = `task claimer claimed: ${finalResults.length}; stale: ${staleTasks.length}; conflicts: ${conflicts}; missing: ${missingTasks.length}; updateErrors: ${bulkErrors}; removed: ${removedCount};`;
|
||||
logger.debug(message, logMeta);
|
||||
|
||||
// build results
|
||||
const finalResult = {
|
||||
stats: {
|
||||
tasksUpdated: fullTasksToRun.length,
|
||||
tasksUpdated: finalResults.length,
|
||||
tasksConflicted: conflicts,
|
||||
tasksClaimed: fullTasksToRun.length,
|
||||
tasksLeftUnclaimed: leftOverTasks.length,
|
||||
tasksClaimed: finalResults.length,
|
||||
},
|
||||
docs: fullTasksToRun,
|
||||
docs: finalResults,
|
||||
timing: stopTaskTimer(),
|
||||
};
|
||||
|
||||
for (const doc of fullTasksToRun) {
|
||||
for (const doc of finalResults) {
|
||||
events$.next(asTaskClaimEvent(doc.id, asOk(doc), finalResult.timing));
|
||||
}
|
||||
|
||||
|
@ -345,16 +296,12 @@ async function searchAvailableTasks({
|
|||
tasksWithPartitions(partitions)
|
||||
);
|
||||
|
||||
return await taskStore.fetch(
|
||||
{
|
||||
query,
|
||||
sort,
|
||||
size,
|
||||
seq_no_primary_term: true,
|
||||
},
|
||||
// limit the response size
|
||||
true
|
||||
);
|
||||
return await taskStore.fetch({
|
||||
query,
|
||||
sort,
|
||||
size,
|
||||
seq_no_primary_term: true,
|
||||
});
|
||||
}
|
||||
|
||||
function applyLimitedConcurrency(
|
||||
|
|
|
@ -8,14 +8,16 @@ import { TaskPool } from './task_pool';
|
|||
|
||||
const defaultGetCapacityOverride: () => Partial<{
|
||||
load: number;
|
||||
usedCapacity: number;
|
||||
usedCapacityPercentage: number;
|
||||
availableCapacity: number;
|
||||
occupiedWorkers: number;
|
||||
workerLoad: number;
|
||||
max: number;
|
||||
availableWorkers: number;
|
||||
}> = () => ({
|
||||
load: 0,
|
||||
usedCapacity: 0,
|
||||
usedCapacityPercentage: 0,
|
||||
availableCapacity: 20,
|
||||
occupiedWorkers: 0,
|
||||
workerLoad: 0,
|
||||
max: 10,
|
||||
availableWorkers: 10,
|
||||
});
|
||||
|
||||
const createTaskPoolMock = (getCapacityOverride = defaultGetCapacityOverride) => {
|
||||
|
@ -23,16 +25,19 @@ const createTaskPoolMock = (getCapacityOverride = defaultGetCapacityOverride) =>
|
|||
get load() {
|
||||
return getCapacityOverride().load ?? 0;
|
||||
},
|
||||
get usedCapacity() {
|
||||
return getCapacityOverride().usedCapacity ?? 0;
|
||||
get occupiedWorkers() {
|
||||
return getCapacityOverride().occupiedWorkers ?? 0;
|
||||
},
|
||||
get usedCapacityPercentage() {
|
||||
return getCapacityOverride().usedCapacityPercentage ?? 0;
|
||||
get workerLoad() {
|
||||
return getCapacityOverride().workerLoad ?? 0;
|
||||
},
|
||||
availableCapacity() {
|
||||
return getCapacityOverride().availableCapacity ?? 20;
|
||||
get max() {
|
||||
return getCapacityOverride().max ?? 10;
|
||||
},
|
||||
getUsedCapacityByType: jest.fn(),
|
||||
get availableWorkers() {
|
||||
return getCapacityOverride().availableWorkers ?? 10;
|
||||
},
|
||||
getOccupiedWorkersByType: jest.fn(),
|
||||
run: jest.fn(),
|
||||
cancelRunningTasks: jest.fn(),
|
||||
} as unknown as jest.Mocked<TaskPool>;
|
471
x-pack/plugins/task_manager/server/task_pool.test.ts
Normal file
471
x-pack/plugins/task_manager/server/task_pool.test.ts
Normal file
|
@ -0,0 +1,471 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
import sinon from 'sinon';
|
||||
import { of, Subject } from 'rxjs';
|
||||
import { TaskPool, TaskPoolRunResult } from './task_pool';
|
||||
import { resolvable, sleep } from './test_utils';
|
||||
import { loggingSystemMock } from '@kbn/core/server/mocks';
|
||||
import { Logger } from '@kbn/core/server';
|
||||
import { asOk } from './lib/result_type';
|
||||
import { SavedObjectsErrorHelpers } from '@kbn/core/server';
|
||||
import moment from 'moment';
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import { TaskRunningStage } from './task_running';
|
||||
|
||||
describe('TaskPool', () => {
|
||||
beforeEach(() => {
|
||||
jest.useFakeTimers();
|
||||
jest.setSystemTime(new Date(2021, 12, 30));
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
jest.useRealTimers();
|
||||
});
|
||||
|
||||
test('occupiedWorkers are a sum of running tasks', async () => {
|
||||
const pool = new TaskPool({
|
||||
maxWorkers$: of(200),
|
||||
logger: loggingSystemMock.create().get(),
|
||||
});
|
||||
|
||||
const result = await pool.run([{ ...mockTask() }, { ...mockTask() }, { ...mockTask() }]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAllClaimedTasks);
|
||||
expect(pool.occupiedWorkers).toEqual(3);
|
||||
});
|
||||
|
||||
test('availableWorkers are a function of total_capacity - occupiedWorkers', async () => {
|
||||
const pool = new TaskPool({
|
||||
maxWorkers$: of(10),
|
||||
logger: loggingSystemMock.create().get(),
|
||||
});
|
||||
|
||||
const result = await pool.run([{ ...mockTask() }, { ...mockTask() }, { ...mockTask() }]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAllClaimedTasks);
|
||||
expect(pool.availableWorkers).toEqual(7);
|
||||
});
|
||||
|
||||
test('availableWorkers is 0 until maxWorkers$ pushes a value', async () => {
|
||||
const maxWorkers$ = new Subject<number>();
|
||||
const pool = new TaskPool({
|
||||
maxWorkers$,
|
||||
logger: loggingSystemMock.create().get(),
|
||||
});
|
||||
|
||||
expect(pool.availableWorkers).toEqual(0);
|
||||
maxWorkers$.next(10);
|
||||
expect(pool.availableWorkers).toEqual(10);
|
||||
});
|
||||
|
||||
test('does not run tasks that are beyond its available capacity', async () => {
|
||||
const pool = new TaskPool({
|
||||
maxWorkers$: of(2),
|
||||
logger: loggingSystemMock.create().get(),
|
||||
});
|
||||
|
||||
const shouldRun = mockRun();
|
||||
const shouldNotRun = mockRun();
|
||||
|
||||
const result = await pool.run([
|
||||
{ ...mockTask(), run: shouldRun },
|
||||
{ ...mockTask(), run: shouldRun },
|
||||
{ ...mockTask(), run: shouldNotRun },
|
||||
]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RanOutOfCapacity);
|
||||
expect(pool.availableWorkers).toEqual(0);
|
||||
expect(shouldRun).toHaveBeenCalledTimes(2);
|
||||
expect(shouldNotRun).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test('should log when marking a Task as running fails', async () => {
|
||||
const logger = loggingSystemMock.create().get();
|
||||
const pool = new TaskPool({
|
||||
maxWorkers$: of(2),
|
||||
logger,
|
||||
});
|
||||
|
||||
const taskFailedToMarkAsRunning = mockTask();
|
||||
taskFailedToMarkAsRunning.markTaskAsRunning.mockImplementation(async () => {
|
||||
throw new Error(`Mark Task as running has failed miserably`);
|
||||
});
|
||||
|
||||
const result = await pool.run([mockTask(), taskFailedToMarkAsRunning, mockTask()]);
|
||||
|
||||
expect((logger as jest.Mocked<Logger>).error.mock.calls[0]).toMatchInlineSnapshot(`
|
||||
Array [
|
||||
"Failed to mark Task TaskType \\"shooooo\\" as running: Mark Task as running has failed miserably",
|
||||
]
|
||||
`);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAtCapacity);
|
||||
});
|
||||
|
||||
test('should log when running a Task fails', async () => {
|
||||
const logger = loggingSystemMock.create().get();
|
||||
const pool = new TaskPool({
|
||||
maxWorkers$: of(3),
|
||||
logger,
|
||||
});
|
||||
|
||||
const taskFailedToRun = mockTask();
|
||||
taskFailedToRun.run.mockImplementation(async () => {
|
||||
throw new Error(`Run Task has failed miserably`);
|
||||
});
|
||||
|
||||
const result = await pool.run([mockTask(), taskFailedToRun, mockTask()]);
|
||||
|
||||
expect((logger as jest.Mocked<Logger>).warn.mock.calls[0]).toMatchInlineSnapshot(`
|
||||
Array [
|
||||
"Task TaskType \\"shooooo\\" failed in attempt to run: Run Task has failed miserably",
|
||||
]
|
||||
`);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAllClaimedTasks);
|
||||
});
|
||||
|
||||
test('should not log when running a Task fails due to the Task SO having been deleted while in flight', async () => {
|
||||
const logger = loggingSystemMock.create().get();
|
||||
const pool = new TaskPool({
|
||||
maxWorkers$: of(3),
|
||||
logger,
|
||||
});
|
||||
|
||||
const taskFailedToRun = mockTask();
|
||||
taskFailedToRun.run.mockImplementation(async () => {
|
||||
throw SavedObjectsErrorHelpers.createGenericNotFoundError('task', taskFailedToRun.id);
|
||||
});
|
||||
|
||||
const result = await pool.run([mockTask(), taskFailedToRun, mockTask()]);
|
||||
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
`Task TaskType "shooooo" failed in attempt to run: Saved object [task/${taskFailedToRun.id}] not found`
|
||||
);
|
||||
expect(logger.warn).not.toHaveBeenCalled();
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAllClaimedTasks);
|
||||
});
|
||||
|
||||
test('Running a task which fails still takes up capacity', async () => {
|
||||
const logger = loggingSystemMock.create().get();
|
||||
const pool = new TaskPool({
|
||||
maxWorkers$: of(1),
|
||||
logger,
|
||||
});
|
||||
|
||||
const taskFailedToRun = mockTask();
|
||||
taskFailedToRun.run.mockImplementation(async () => {
|
||||
await sleep(0);
|
||||
throw new Error(`Run Task has failed miserably`);
|
||||
});
|
||||
|
||||
const result = await pool.run([taskFailedToRun, mockTask()]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RanOutOfCapacity);
|
||||
});
|
||||
|
||||
test('clears up capacity when a task completes', async () => {
|
||||
const pool = new TaskPool({
|
||||
maxWorkers$: of(1),
|
||||
logger: loggingSystemMock.create().get(),
|
||||
});
|
||||
|
||||
const firstWork = resolvable();
|
||||
const firstRun = sinon.spy(async () => {
|
||||
await sleep(0);
|
||||
firstWork.resolve();
|
||||
return asOk({ state: {} });
|
||||
});
|
||||
const secondWork = resolvable();
|
||||
const secondRun = sinon.spy(async () => {
|
||||
await sleep(0);
|
||||
secondWork.resolve();
|
||||
return asOk({ state: {} });
|
||||
});
|
||||
|
||||
const result = await pool.run([
|
||||
{ ...mockTask(), run: firstRun },
|
||||
{ ...mockTask(), run: secondRun },
|
||||
]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RanOutOfCapacity);
|
||||
expect(pool.occupiedWorkers).toEqual(1);
|
||||
expect(pool.availableWorkers).toEqual(0);
|
||||
|
||||
await firstWork;
|
||||
sinon.assert.calledOnce(firstRun);
|
||||
sinon.assert.notCalled(secondRun);
|
||||
|
||||
expect(pool.occupiedWorkers).toEqual(0);
|
||||
await pool.run([{ ...mockTask(), run: secondRun }]);
|
||||
expect(pool.occupiedWorkers).toEqual(1);
|
||||
|
||||
expect(pool.availableWorkers).toEqual(0);
|
||||
|
||||
await secondWork;
|
||||
|
||||
expect(pool.occupiedWorkers).toEqual(0);
|
||||
expect(pool.availableWorkers).toEqual(1);
|
||||
sinon.assert.calledOnce(secondRun);
|
||||
});
|
||||
|
||||
test('run cancels expired tasks prior to running new tasks', async () => {
|
||||
const logger = loggingSystemMock.create().get();
|
||||
const pool = new TaskPool({
|
||||
maxWorkers$: of(2),
|
||||
logger,
|
||||
});
|
||||
|
||||
const haltUntilWeAfterFirstRun = resolvable();
|
||||
const taskHasExpired = resolvable();
|
||||
const haltTaskSoThatItCanBeCanceled = resolvable();
|
||||
|
||||
const shouldRun = sinon.spy(() => Promise.resolve());
|
||||
const shouldNotRun = sinon.spy(() => Promise.resolve());
|
||||
const now = new Date();
|
||||
const result = await pool.run([
|
||||
{
|
||||
...mockTask({ id: '1' }),
|
||||
async run() {
|
||||
await haltUntilWeAfterFirstRun;
|
||||
this.isExpired = true;
|
||||
taskHasExpired.resolve();
|
||||
await haltTaskSoThatItCanBeCanceled;
|
||||
return asOk({ state: {} });
|
||||
},
|
||||
get expiration() {
|
||||
return now;
|
||||
},
|
||||
get startedAt() {
|
||||
// 5 and a half minutes
|
||||
return moment(now).subtract(5, 'm').subtract(30, 's').toDate();
|
||||
},
|
||||
cancel: shouldRun,
|
||||
},
|
||||
{
|
||||
...mockTask({ id: '2' }),
|
||||
async run() {
|
||||
// halt here so that we can verify that this task is counted in `occupiedWorkers`
|
||||
await haltUntilWeAfterFirstRun;
|
||||
return asOk({ state: {} });
|
||||
},
|
||||
cancel: shouldNotRun,
|
||||
},
|
||||
]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAtCapacity);
|
||||
expect(pool.occupiedWorkers).toEqual(2);
|
||||
expect(pool.availableWorkers).toEqual(0);
|
||||
|
||||
// release first stage in task so that it has time to expire, but not complete
|
||||
haltUntilWeAfterFirstRun.resolve();
|
||||
await taskHasExpired;
|
||||
|
||||
expect(await pool.run([{ ...mockTask({ id: '3' }) }])).toBeTruthy();
|
||||
|
||||
sinon.assert.calledOnce(shouldRun);
|
||||
sinon.assert.notCalled(shouldNotRun);
|
||||
|
||||
expect(pool.occupiedWorkers).toEqual(1);
|
||||
expect(pool.availableWorkers).toEqual(1);
|
||||
|
||||
haltTaskSoThatItCanBeCanceled.resolve();
|
||||
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
`Cancelling task TaskType "shooooo" as it expired at ${now.toISOString()} after running for 05m 30s (with timeout set at 5m).`
|
||||
);
|
||||
});
|
||||
|
||||
test('calls to availableWorkers ensures we cancel expired tasks', async () => {
|
||||
const pool = new TaskPool({
|
||||
maxWorkers$: of(1),
|
||||
logger: loggingSystemMock.create().get(),
|
||||
});
|
||||
|
||||
const taskIsRunning = resolvable();
|
||||
const taskHasExpired = resolvable();
|
||||
const cancel = sinon.spy(() => Promise.resolve());
|
||||
const now = new Date();
|
||||
expect(
|
||||
await pool.run([
|
||||
{
|
||||
...mockTask(),
|
||||
async run() {
|
||||
await sleep(10);
|
||||
this.isExpired = true;
|
||||
taskIsRunning.resolve();
|
||||
await taskHasExpired;
|
||||
return asOk({ state: {} });
|
||||
},
|
||||
get expiration() {
|
||||
return new Date(now.getTime() + 10);
|
||||
},
|
||||
get startedAt() {
|
||||
return now;
|
||||
},
|
||||
cancel,
|
||||
},
|
||||
])
|
||||
).toEqual(TaskPoolRunResult.RunningAtCapacity);
|
||||
|
||||
await taskIsRunning;
|
||||
|
||||
sinon.assert.notCalled(cancel);
|
||||
expect(pool.occupiedWorkers).toEqual(1);
|
||||
// The call to `availableWorkers` will clear the expired task so it's 1 instead of 0
|
||||
expect(pool.availableWorkers).toEqual(1);
|
||||
sinon.assert.calledOnce(cancel);
|
||||
|
||||
expect(pool.occupiedWorkers).toEqual(0);
|
||||
expect(pool.availableWorkers).toEqual(1);
|
||||
// ensure cancel isn't called twice
|
||||
sinon.assert.calledOnce(cancel);
|
||||
taskHasExpired.resolve();
|
||||
});
|
||||
|
||||
test('logs if cancellation errors', async () => {
|
||||
const logger = loggingSystemMock.create().get();
|
||||
const pool = new TaskPool({
|
||||
logger,
|
||||
maxWorkers$: of(20),
|
||||
});
|
||||
|
||||
const cancelled = resolvable();
|
||||
const result = await pool.run([
|
||||
{
|
||||
...mockTask(),
|
||||
async run() {
|
||||
this.isExpired = true;
|
||||
await sleep(10);
|
||||
return asOk({ state: {} });
|
||||
},
|
||||
async cancel() {
|
||||
cancelled.resolve();
|
||||
throw new Error('Dern!');
|
||||
},
|
||||
toString: () => '"shooooo!"',
|
||||
},
|
||||
]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAllClaimedTasks);
|
||||
await pool.run([]);
|
||||
|
||||
expect(pool.occupiedWorkers).toEqual(0);
|
||||
|
||||
// Allow the task to cancel...
|
||||
await cancelled;
|
||||
|
||||
expect((logger as jest.Mocked<Logger>).error.mock.calls[0][0]).toMatchInlineSnapshot(
|
||||
`"Failed to cancel task \\"shooooo!\\": Error: Dern!"`
|
||||
);
|
||||
});
|
||||
|
||||
test('only allows one task with the same id in the task pool', async () => {
|
||||
const logger = loggingSystemMock.create().get();
|
||||
const pool = new TaskPool({
|
||||
maxWorkers$: of(2),
|
||||
logger,
|
||||
});
|
||||
|
||||
const shouldRun = mockRun();
|
||||
const shouldNotRun = mockRun();
|
||||
|
||||
const taskId = uuidv4();
|
||||
const task1 = mockTask({ id: taskId, run: shouldRun });
|
||||
const task2 = mockTask({
|
||||
id: taskId,
|
||||
run: shouldNotRun,
|
||||
isSameTask() {
|
||||
return true;
|
||||
},
|
||||
});
|
||||
|
||||
await pool.run([task1]);
|
||||
await pool.run([task2]);
|
||||
|
||||
expect(shouldRun).toHaveBeenCalledTimes(1);
|
||||
expect(shouldNotRun).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// This test is from https://github.com/elastic/kibana/issues/172116
|
||||
// It's not clear how to reproduce the actual error, but it is easy to
|
||||
// reproduce with the wacky test below. It does log the exact error
|
||||
// from that issue, without the corresponding fix in task_pool.ts
|
||||
test('works when available workers is 0 but there are tasks to run', async () => {
|
||||
const logger = loggingSystemMock.create().get();
|
||||
const pool = new TaskPool({
|
||||
maxWorkers$: of(2),
|
||||
logger,
|
||||
});
|
||||
|
||||
const shouldRun = mockRun();
|
||||
|
||||
const taskId = uuidv4();
|
||||
const task1 = mockTask({ id: taskId, run: shouldRun });
|
||||
|
||||
// we need to alternate the values of `availableWorkers`. First it
|
||||
// should be 0, then 1, then 0, then 1, etc. This will cause task_pool.run
|
||||
// to partition tasks (0 to run, everything as leftover), then at the
|
||||
// end of run(), to check if it should recurse, it should be > 0.
|
||||
let awValue = 1;
|
||||
Object.defineProperty(pool, 'availableWorkers', {
|
||||
get() {
|
||||
return ++awValue % 2;
|
||||
},
|
||||
});
|
||||
|
||||
const result = await pool.run([task1]);
|
||||
expect(result).toBe(TaskPoolRunResult.RanOutOfCapacity);
|
||||
|
||||
expect((logger as jest.Mocked<Logger>).warn.mock.calls[0]).toMatchInlineSnapshot(`
|
||||
Array [
|
||||
"task pool run attempts exceeded 3; assuming ran out of capacity; availableWorkers: 0, tasksToRun: 0, leftOverTasks: 1, maxWorkers: 2, occupiedWorkers: 0, workerLoad: 0",
|
||||
]
|
||||
`);
|
||||
});
|
||||
|
||||
function mockRun() {
|
||||
return jest.fn(async () => {
|
||||
await sleep(0);
|
||||
return asOk({ state: {} });
|
||||
});
|
||||
}
|
||||
|
||||
function mockTask(overrides = {}) {
|
||||
return {
|
||||
isExpired: false,
|
||||
taskExecutionId: uuidv4(),
|
||||
id: uuidv4(),
|
||||
cancel: async () => undefined,
|
||||
markTaskAsRunning: jest.fn(async () => true),
|
||||
run: mockRun(),
|
||||
stage: TaskRunningStage.PENDING,
|
||||
toString: () => `TaskType "shooooo"`,
|
||||
isAdHocTaskAndOutOfAttempts: false,
|
||||
removeTask: jest.fn(),
|
||||
get expiration() {
|
||||
return new Date();
|
||||
},
|
||||
get startedAt() {
|
||||
return new Date();
|
||||
},
|
||||
get definition() {
|
||||
return {
|
||||
type: '',
|
||||
title: '',
|
||||
timeout: '5m',
|
||||
createTaskRunner: jest.fn(),
|
||||
};
|
||||
},
|
||||
isSameTask() {
|
||||
return false;
|
||||
},
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
});
|
|
@ -13,20 +13,13 @@ import { Observable, Subject } from 'rxjs';
|
|||
import moment, { Duration } from 'moment';
|
||||
import { padStart } from 'lodash';
|
||||
import { Logger } from '@kbn/core/server';
|
||||
import { TaskRunner } from '../task_running';
|
||||
import { isTaskSavedObjectNotFoundError } from '../lib/is_task_not_found_error';
|
||||
import { TaskManagerStat } from '../task_events';
|
||||
import { ICapacity } from './types';
|
||||
import { CLAIM_STRATEGY_MGET } from '../config';
|
||||
import { WorkerCapacity } from './worker_capacity';
|
||||
import { CostCapacity } from './cost_capacity';
|
||||
import { TaskTypeDictionary } from '../task_type_dictionary';
|
||||
import { TaskRunner } from './task_running';
|
||||
import { isTaskSavedObjectNotFoundError } from './lib/is_task_not_found_error';
|
||||
import { TaskManagerStat } from './task_events';
|
||||
|
||||
interface TaskPoolOpts {
|
||||
capacity$: Observable<number>;
|
||||
definitions: TaskTypeDictionary;
|
||||
interface Opts {
|
||||
maxWorkers$: Observable<number>;
|
||||
logger: Logger;
|
||||
strategy: string;
|
||||
}
|
||||
|
||||
export enum TaskPoolRunResult {
|
||||
|
@ -41,43 +34,31 @@ export enum TaskPoolRunResult {
|
|||
}
|
||||
|
||||
const VERSION_CONFLICT_MESSAGE = 'Task has been claimed by another Kibana service';
|
||||
const MAX_RUN_ATTEMPTS = 3;
|
||||
|
||||
/**
|
||||
* Runs tasks in batches, taking costs into account.
|
||||
*/
|
||||
export class TaskPool {
|
||||
private maxWorkers: number = 0;
|
||||
private tasksInPool = new Map<string, TaskRunner>();
|
||||
private logger: Logger;
|
||||
private load$ = new Subject<TaskManagerStat>();
|
||||
private definitions: TaskTypeDictionary;
|
||||
private capacityCalculator: ICapacity;
|
||||
|
||||
/**
|
||||
* Creates an instance of TaskPool.
|
||||
*
|
||||
* @param {Opts} opts
|
||||
* @prop {number} capacity - The total capacity available
|
||||
* (e.g. capacity is 4, then 2 tasks of cost 2 can run at a time, or 4 tasks of cost 1)
|
||||
* @prop {number} maxWorkers - The total number of workers / work slots available
|
||||
* (e.g. maxWorkers is 4, then 2 tasks of cost 2 can run at a time, or 4 tasks of cost 1)
|
||||
* @prop {Logger} logger - The task manager logger.
|
||||
*/
|
||||
constructor(opts: TaskPoolOpts) {
|
||||
constructor(opts: Opts) {
|
||||
this.logger = opts.logger;
|
||||
this.definitions = opts.definitions;
|
||||
|
||||
switch (opts.strategy) {
|
||||
case CLAIM_STRATEGY_MGET:
|
||||
this.capacityCalculator = new CostCapacity({
|
||||
capacity$: opts.capacity$,
|
||||
logger: this.logger,
|
||||
});
|
||||
break;
|
||||
|
||||
default:
|
||||
this.capacityCalculator = new WorkerCapacity({
|
||||
capacity$: opts.capacity$,
|
||||
logger: this.logger,
|
||||
});
|
||||
}
|
||||
opts.maxWorkers$.subscribe((maxWorkers) => {
|
||||
this.logger.debug(`Task pool now using ${maxWorkers} as the max worker value`);
|
||||
this.maxWorkers = maxWorkers;
|
||||
});
|
||||
}
|
||||
|
||||
public get load(): Observable<TaskManagerStat> {
|
||||
|
@ -85,39 +66,38 @@ export class TaskPool {
|
|||
}
|
||||
|
||||
/**
|
||||
* Gets how much capacity is currently in use.
|
||||
* Gets how many workers are currently in use.
|
||||
*/
|
||||
public get usedCapacity() {
|
||||
return this.capacityCalculator.usedCapacity(this.tasksInPool);
|
||||
public get occupiedWorkers() {
|
||||
return this.tasksInPool.size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets how much capacity is currently in use as a percentage
|
||||
* Gets % of workers in use
|
||||
*/
|
||||
public get usedCapacityPercentage() {
|
||||
return this.capacityCalculator.usedCapacityPercentage(this.tasksInPool);
|
||||
public get workerLoad() {
|
||||
return this.maxWorkers ? Math.round((this.occupiedWorkers * 100) / this.maxWorkers) : 100;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets how much capacity is currently available.
|
||||
* Gets how many workers are currently available.
|
||||
*/
|
||||
public availableCapacity(taskType?: string) {
|
||||
public get availableWorkers() {
|
||||
// cancel expired task whenever a call is made to check for capacity
|
||||
// this ensures that we don't end up with a queue of hung tasks causing both
|
||||
// the poller and the pool from hanging due to lack of capacity
|
||||
this.cancelExpiredTasks();
|
||||
|
||||
return this.capacityCalculator.availableCapacity(
|
||||
this.tasksInPool,
|
||||
taskType ? this.definitions.get(taskType) : null
|
||||
);
|
||||
return this.maxWorkers - this.occupiedWorkers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets how much capacity is currently in use by each type.
|
||||
* Gets how many workers are currently in use by type.
|
||||
*/
|
||||
public getUsedCapacityByType(type: string) {
|
||||
return this.capacityCalculator.getUsedCapacityByType([...this.tasksInPool.values()], type);
|
||||
public getOccupiedWorkersByType(type: string) {
|
||||
return [...this.tasksInPool.values()].reduce(
|
||||
(count, runningTask) => (runningTask.definition.type === type ? ++count : count),
|
||||
0
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -128,14 +108,26 @@ export class TaskPool {
|
|||
* @param {TaskRunner[]} tasks
|
||||
* @returns {Promise<boolean>}
|
||||
*/
|
||||
public async run(tasks: TaskRunner[]): Promise<TaskPoolRunResult> {
|
||||
// Note `this.availableCapacity` has side effects, so we just want
|
||||
public async run(tasks: TaskRunner[], attempt = 1): Promise<TaskPoolRunResult> {
|
||||
// Note `this.availableWorkers` is a getter with side effects, so we just want
|
||||
// to call it once for this bit of the code.
|
||||
const availableCapacity = this.availableCapacity();
|
||||
const [tasksToRun, leftOverTasks] = this.capacityCalculator.determineTasksToRunBasedOnCapacity(
|
||||
tasks,
|
||||
availableCapacity
|
||||
);
|
||||
const availableWorkers = this.availableWorkers;
|
||||
const [tasksToRun, leftOverTasks] = partitionListByCount(tasks, availableWorkers);
|
||||
|
||||
if (attempt > MAX_RUN_ATTEMPTS) {
|
||||
const stats = [
|
||||
`availableWorkers: ${availableWorkers}`,
|
||||
`tasksToRun: ${tasksToRun.length}`,
|
||||
`leftOverTasks: ${leftOverTasks.length}`,
|
||||
`maxWorkers: ${this.maxWorkers}`,
|
||||
`occupiedWorkers: ${this.occupiedWorkers}`,
|
||||
`workerLoad: ${this.workerLoad}`,
|
||||
].join(', ');
|
||||
this.logger.warn(
|
||||
`task pool run attempts exceeded ${MAX_RUN_ATTEMPTS}; assuming ran out of capacity; ${stats}`
|
||||
);
|
||||
return TaskPoolRunResult.RanOutOfCapacity;
|
||||
}
|
||||
|
||||
if (tasksToRun.length) {
|
||||
await Promise.all(
|
||||
|
@ -171,10 +163,11 @@ export class TaskPool {
|
|||
}
|
||||
|
||||
if (leftOverTasks.length) {
|
||||
// leave any leftover tasks
|
||||
// they will be available for claiming in 30 seconds
|
||||
if (this.availableWorkers) {
|
||||
return this.run(leftOverTasks, attempt + 1);
|
||||
}
|
||||
return TaskPoolRunResult.RanOutOfCapacity;
|
||||
} else if (!this.availableCapacity()) {
|
||||
} else if (!this.availableWorkers) {
|
||||
return TaskPoolRunResult.RunningAtCapacity;
|
||||
}
|
||||
return TaskPoolRunResult.RunningAllClaimedTasks;
|
||||
|
@ -249,6 +242,11 @@ export class TaskPool {
|
|||
}
|
||||
}
|
||||
|
||||
function partitionListByCount<T>(list: T[], count: number): [T[], T[]] {
|
||||
const listInCount = list.splice(0, count);
|
||||
return [listInCount, list];
|
||||
}
|
||||
|
||||
function durationAsString(duration: Duration): string {
|
||||
const [m, s] = [duration.minutes(), duration.seconds()].map((value) =>
|
||||
padStart(`${value}`, 2, '0')
|
|
@ -1,21 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
const createCapacityMock = () => {
|
||||
return jest.fn().mockImplementation(() => {
|
||||
return {
|
||||
determineTasksToRunBasedOnCapacity: jest.fn(),
|
||||
getUsedCapacityByType: jest.fn(),
|
||||
usedCapacityPercentage: jest.fn(),
|
||||
usedCapacity: jest.fn(),
|
||||
capacity: jest.fn(),
|
||||
};
|
||||
});
|
||||
};
|
||||
|
||||
export const capacityMock = {
|
||||
create: createCapacityMock(),
|
||||
};
|
|
@ -1,171 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { loggingSystemMock } from '@kbn/core/server/mocks';
|
||||
import { of, Subject } from 'rxjs';
|
||||
import { TaskCost } from '../task';
|
||||
import { CostCapacity } from './cost_capacity';
|
||||
import { mockTask } from './test_utils';
|
||||
|
||||
const logger = loggingSystemMock.create().get();
|
||||
|
||||
describe('CostCapacity', () => {
|
||||
beforeEach(() => {
|
||||
jest.resetAllMocks();
|
||||
});
|
||||
|
||||
test('capacity responds to changes from capacity$ observable', () => {
|
||||
const capacity$ = new Subject<number>();
|
||||
const pool = new CostCapacity({ capacity$, logger });
|
||||
|
||||
expect(pool.capacity).toBe(0);
|
||||
|
||||
capacity$.next(20);
|
||||
expect(pool.capacity).toBe(40);
|
||||
|
||||
capacity$.next(16);
|
||||
expect(pool.capacity).toBe(32);
|
||||
|
||||
expect(logger.debug).toHaveBeenCalledTimes(2);
|
||||
expect(logger.debug).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
`Task pool now using 40 as the max allowed cost which is based on a capacity of 20`
|
||||
);
|
||||
expect(logger.debug).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
`Task pool now using 32 as the max allowed cost which is based on a capacity of 16`
|
||||
);
|
||||
});
|
||||
|
||||
test('usedCapacity returns the sum of costs of tasks in the pool', () => {
|
||||
const pool = new CostCapacity({ capacity$: of(10), logger });
|
||||
|
||||
const tasksInPool = new Map([
|
||||
['1', { ...mockTask() }],
|
||||
['2', { ...mockTask({}, { cost: TaskCost.Tiny }) }],
|
||||
['3', { ...mockTask() }],
|
||||
]);
|
||||
|
||||
expect(pool.usedCapacity(tasksInPool)).toBe(5);
|
||||
});
|
||||
|
||||
test('usedCapacityPercentage returns the percentage of capacity used based on cost of tasks in the pool', () => {
|
||||
const pool = new CostCapacity({ capacity$: of(10), logger });
|
||||
|
||||
const tasksInPool = new Map([
|
||||
['1', { ...mockTask() }],
|
||||
['2', { ...mockTask({}, { cost: TaskCost.Tiny }) }],
|
||||
['3', { ...mockTask() }],
|
||||
]);
|
||||
|
||||
expect(pool.usedCapacityPercentage(tasksInPool)).toBe(25);
|
||||
});
|
||||
|
||||
test('usedCapacityByType returns the sum of of costs of tasks of specified type in the pool', () => {
|
||||
const pool = new CostCapacity({ capacity$: of(10), logger });
|
||||
|
||||
const tasksInPool = [
|
||||
{ ...mockTask({}, { type: 'type1' }) },
|
||||
{ ...mockTask({}, { type: 'type1', cost: TaskCost.Tiny }) },
|
||||
{ ...mockTask({}, { type: 'type2' }) },
|
||||
];
|
||||
|
||||
expect(pool.getUsedCapacityByType(tasksInPool, 'type1')).toBe(3);
|
||||
expect(pool.getUsedCapacityByType(tasksInPool, 'type2')).toBe(2);
|
||||
expect(pool.getUsedCapacityByType(tasksInPool, 'type3')).toBe(0);
|
||||
});
|
||||
|
||||
test('availableCapacity returns the full available capacity when no task type is defined', () => {
|
||||
const pool = new CostCapacity({ capacity$: of(10), logger });
|
||||
|
||||
const tasksInPool = new Map([
|
||||
['1', { ...mockTask() }],
|
||||
['2', { ...mockTask({}, { cost: TaskCost.Tiny }) }],
|
||||
['3', { ...mockTask() }],
|
||||
]);
|
||||
|
||||
expect(pool.availableCapacity(tasksInPool)).toBe(15);
|
||||
});
|
||||
|
||||
test('availableCapacity returns the full available capacity when task type with no maxConcurrency is provided', () => {
|
||||
const pool = new CostCapacity({ capacity$: of(10), logger });
|
||||
|
||||
const tasksInPool = new Map([
|
||||
['1', { ...mockTask() }],
|
||||
['2', { ...mockTask({}, { cost: TaskCost.Tiny }) }],
|
||||
['3', { ...mockTask() }],
|
||||
]);
|
||||
|
||||
expect(
|
||||
pool.availableCapacity(tasksInPool, {
|
||||
type: 'type1',
|
||||
cost: TaskCost.Normal,
|
||||
createTaskRunner: jest.fn(),
|
||||
timeout: '5m',
|
||||
})
|
||||
).toBe(15);
|
||||
});
|
||||
|
||||
test('availableCapacity returns the available capacity for the task type when task type with maxConcurrency is provided', () => {
|
||||
const pool = new CostCapacity({ capacity$: of(10), logger });
|
||||
|
||||
const tasksInPool = new Map([
|
||||
['1', { ...mockTask({}, { type: 'type1' }) }],
|
||||
['2', { ...mockTask({}, { cost: TaskCost.Tiny }) }],
|
||||
['3', { ...mockTask() }],
|
||||
]);
|
||||
|
||||
expect(
|
||||
pool.availableCapacity(tasksInPool, {
|
||||
type: 'type1',
|
||||
maxConcurrency: 3,
|
||||
cost: TaskCost.Normal,
|
||||
createTaskRunner: jest.fn(),
|
||||
timeout: '5m',
|
||||
})
|
||||
).toBe(4);
|
||||
});
|
||||
|
||||
describe('determineTasksToRunBasedOnCapacity', () => {
|
||||
test('runs all tasks if there is capacity', () => {
|
||||
const pool = new CostCapacity({ capacity$: of(10), logger });
|
||||
const tasks = [{ ...mockTask() }, { ...mockTask() }, { ...mockTask() }];
|
||||
const [tasksToRun, leftoverTasks] = pool.determineTasksToRunBasedOnCapacity(tasks, 20);
|
||||
|
||||
expect(tasksToRun).toEqual(tasks);
|
||||
expect(leftoverTasks).toEqual([]);
|
||||
});
|
||||
|
||||
test('runs task in order until capacity is reached', () => {
|
||||
const pool = new CostCapacity({ capacity$: of(10), logger });
|
||||
const tasks = [
|
||||
{ ...mockTask() },
|
||||
{ ...mockTask() },
|
||||
{ ...mockTask() },
|
||||
{ ...mockTask({}, { cost: TaskCost.ExtraLarge }) },
|
||||
{ ...mockTask({}, { cost: TaskCost.ExtraLarge }) },
|
||||
// technically have capacity for these tasks if we skip the previous task, but we're running
|
||||
// in order to avoid possibly starving large cost tasks
|
||||
{ ...mockTask() },
|
||||
{ ...mockTask() },
|
||||
];
|
||||
const [tasksToRun, leftoverTasks] = pool.determineTasksToRunBasedOnCapacity(tasks, 20);
|
||||
|
||||
expect(tasksToRun).toEqual([tasks[0], tasks[1], tasks[2], tasks[3]]);
|
||||
expect(leftoverTasks).toEqual([tasks[4], tasks[5], tasks[6]]);
|
||||
});
|
||||
|
||||
test('does not run tasks if there is no capacity', () => {
|
||||
const pool = new CostCapacity({ capacity$: of(10), logger });
|
||||
const tasks = [{ ...mockTask() }, { ...mockTask() }, { ...mockTask() }];
|
||||
const [tasksToRun, leftoverTasks] = pool.determineTasksToRunBasedOnCapacity(tasks, 1);
|
||||
|
||||
expect(tasksToRun).toEqual([]);
|
||||
expect(leftoverTasks).toEqual(tasks);
|
||||
});
|
||||
});
|
||||
});
|
|
@ -1,109 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { Logger } from '@kbn/core/server';
|
||||
import { TaskDefinition } from '../task';
|
||||
import { TaskRunner } from '../task_running';
|
||||
import { CapacityOpts, ICapacity } from './types';
|
||||
import { getCapacityInCost } from './utils';
|
||||
|
||||
export class CostCapacity implements ICapacity {
|
||||
private maxAllowedCost: number = 0;
|
||||
private logger: Logger;
|
||||
|
||||
constructor(opts: CapacityOpts) {
|
||||
this.logger = opts.logger;
|
||||
opts.capacity$.subscribe((capacity) => {
|
||||
// Capacity config describes the number of normal-cost tasks that can be
|
||||
// run simulatenously. Multiple by the cost of a normal cost to determine
|
||||
// the maximum allowed cost
|
||||
this.maxAllowedCost = getCapacityInCost(capacity);
|
||||
this.logger.debug(
|
||||
`Task pool now using ${this.maxAllowedCost} as the max allowed cost which is based on a capacity of ${capacity}`
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
public get capacity(): number {
|
||||
return this.maxAllowedCost;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets how much capacity is currently in use.
|
||||
*/
|
||||
public usedCapacity(tasksInPool: Map<string, TaskRunner>) {
|
||||
let result = 0;
|
||||
tasksInPool.forEach((task) => {
|
||||
result += task.definition.cost;
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets % of capacity in use
|
||||
*/
|
||||
public usedCapacityPercentage(tasksInPool: Map<string, TaskRunner>) {
|
||||
return this.capacity ? Math.round((this.usedCapacity(tasksInPool) * 100) / this.capacity) : 100;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets how much capacity is currently in use by each type.
|
||||
*/
|
||||
public getUsedCapacityByType(tasksInPool: TaskRunner[], type: string) {
|
||||
return tasksInPool.reduce(
|
||||
(count, runningTask) =>
|
||||
runningTask.definition.type === type ? count + runningTask.definition.cost : count,
|
||||
0
|
||||
);
|
||||
}
|
||||
|
||||
public availableCapacity(
|
||||
tasksInPool: Map<string, TaskRunner>,
|
||||
taskDefinition?: TaskDefinition | null
|
||||
): number {
|
||||
const allAvailableCapacity = this.capacity - this.usedCapacity(tasksInPool);
|
||||
if (taskDefinition && taskDefinition.maxConcurrency) {
|
||||
// calculate the max capacity that can be used for this task type based on cost
|
||||
const maxCapacityForType = taskDefinition.maxConcurrency * taskDefinition.cost;
|
||||
return Math.max(
|
||||
Math.min(
|
||||
allAvailableCapacity,
|
||||
maxCapacityForType -
|
||||
this.getUsedCapacityByType([...tasksInPool.values()], taskDefinition.type)
|
||||
),
|
||||
0
|
||||
);
|
||||
}
|
||||
|
||||
return allAvailableCapacity;
|
||||
}
|
||||
|
||||
public determineTasksToRunBasedOnCapacity(
|
||||
tasks: TaskRunner[],
|
||||
availableCapacity: number
|
||||
): [TaskRunner[], TaskRunner[]] {
|
||||
const tasksToRun: TaskRunner[] = [];
|
||||
const leftOverTasks: TaskRunner[] = [];
|
||||
|
||||
let capacityAccumulator = 0;
|
||||
for (const task of tasks) {
|
||||
const taskCost = task.definition.cost;
|
||||
if (capacityAccumulator + taskCost <= availableCapacity) {
|
||||
tasksToRun.push(task);
|
||||
capacityAccumulator += taskCost;
|
||||
} else {
|
||||
leftOverTasks.push(task);
|
||||
// Don't claim further tasks even if lower cost tasks are next.
|
||||
// It may be an extra large task and we need to make room for it
|
||||
// for the next claiming cycle
|
||||
capacityAccumulator = availableCapacity;
|
||||
}
|
||||
}
|
||||
|
||||
return [tasksToRun, leftOverTasks];
|
||||
}
|
||||
}
|
|
@ -1,9 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
export { TaskPool, TaskPoolRunResult } from './task_pool';
|
||||
export { getCapacityInCost, getCapacityInWorkers } from './utils';
|
|
@ -1,867 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
import sinon from 'sinon';
|
||||
import { of, Subject } from 'rxjs';
|
||||
import { TaskPool, TaskPoolRunResult } from './task_pool';
|
||||
import { resolvable, sleep } from '../test_utils';
|
||||
import { loggingSystemMock } from '@kbn/core/server/mocks';
|
||||
import { Logger } from '@kbn/core/server';
|
||||
import { asOk } from '../lib/result_type';
|
||||
import { SavedObjectsErrorHelpers } from '@kbn/core/server';
|
||||
import moment from 'moment';
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import { TaskCost } from '../task';
|
||||
import * as CostCapacityModule from './cost_capacity';
|
||||
import * as WorkerCapacityModule from './worker_capacity';
|
||||
import { capacityMock } from './capacity.mock';
|
||||
import { CLAIM_STRATEGY_DEFAULT, CLAIM_STRATEGY_MGET } from '../config';
|
||||
import { mockRun, mockTask } from './test_utils';
|
||||
import { TaskTypeDictionary } from '../task_type_dictionary';
|
||||
|
||||
jest.mock('../constants', () => ({
|
||||
CONCURRENCY_ALLOW_LIST_BY_TASK_TYPE: ['report', 'quickReport'],
|
||||
}));
|
||||
|
||||
describe('TaskPool', () => {
|
||||
const costCapacityMock = capacityMock.create();
|
||||
const workerCapacityMock = capacityMock.create();
|
||||
const logger = loggingSystemMock.create().get();
|
||||
|
||||
const definitions = new TaskTypeDictionary(logger);
|
||||
definitions.registerTaskDefinitions({
|
||||
report: {
|
||||
title: 'report',
|
||||
maxConcurrency: 1,
|
||||
cost: TaskCost.ExtraLarge,
|
||||
createTaskRunner: jest.fn(),
|
||||
},
|
||||
quickReport: {
|
||||
title: 'quickReport',
|
||||
maxConcurrency: 5,
|
||||
createTaskRunner: jest.fn(),
|
||||
},
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
jest.resetAllMocks();
|
||||
jest.useFakeTimers();
|
||||
jest.setSystemTime(new Date(2021, 12, 30));
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
jest.useRealTimers();
|
||||
});
|
||||
|
||||
describe('uses the correct capacity calculator based on the strategy', () => {
|
||||
let costCapacitySpy: jest.SpyInstance;
|
||||
let workerCapacitySpy: jest.SpyInstance;
|
||||
beforeEach(() => {
|
||||
costCapacitySpy = jest
|
||||
.spyOn(CostCapacityModule, 'CostCapacity')
|
||||
.mockImplementation(() => costCapacityMock);
|
||||
|
||||
workerCapacitySpy = jest
|
||||
.spyOn(WorkerCapacityModule, 'WorkerCapacity')
|
||||
.mockImplementation(() => workerCapacityMock);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
costCapacitySpy.mockRestore();
|
||||
workerCapacitySpy.mockRestore();
|
||||
});
|
||||
|
||||
test('uses CostCapacity to calculate capacity when strategy is mget', () => {
|
||||
new TaskPool({ capacity$: of(20), definitions, logger, strategy: CLAIM_STRATEGY_MGET });
|
||||
|
||||
expect(CostCapacityModule.CostCapacity).toHaveBeenCalledTimes(1);
|
||||
expect(WorkerCapacityModule.WorkerCapacity).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test('uses WorkerCapacity to calculate capacity when strategy is default', () => {
|
||||
new TaskPool({ capacity$: of(20), definitions, logger, strategy: CLAIM_STRATEGY_DEFAULT });
|
||||
|
||||
expect(CostCapacityModule.CostCapacity).not.toHaveBeenCalled();
|
||||
expect(WorkerCapacityModule.WorkerCapacity).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
test('uses WorkerCapacity to calculate capacity when strategy is unrecognized', () => {
|
||||
new TaskPool({ capacity$: of(20), definitions, logger, strategy: 'any old strategy' });
|
||||
|
||||
expect(CostCapacityModule.CostCapacity).not.toHaveBeenCalled();
|
||||
expect(WorkerCapacityModule.WorkerCapacity).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('with CLAIM_STRATEGY_DEFAULT', () => {
|
||||
test('usedCapacity is the number running tasks', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(10),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_DEFAULT,
|
||||
});
|
||||
|
||||
const result = await pool.run([{ ...mockTask() }, { ...mockTask() }, { ...mockTask() }]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAllClaimedTasks);
|
||||
expect(pool.usedCapacity).toEqual(3);
|
||||
});
|
||||
|
||||
test('availableCapacity are a function of total_capacity - usedCapacity', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(10),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_DEFAULT,
|
||||
});
|
||||
|
||||
const result = await pool.run([{ ...mockTask() }, { ...mockTask() }, { ...mockTask() }]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAllClaimedTasks);
|
||||
expect(pool.availableCapacity()).toEqual(7);
|
||||
});
|
||||
|
||||
test('availableCapacity is 0 until capacity$ pushes a value', async () => {
|
||||
const capacity$ = new Subject<number>();
|
||||
const pool = new TaskPool({
|
||||
capacity$,
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_DEFAULT,
|
||||
});
|
||||
|
||||
expect(pool.availableCapacity()).toEqual(0);
|
||||
capacity$.next(10);
|
||||
expect(pool.availableCapacity()).toEqual(10);
|
||||
});
|
||||
|
||||
test('does not run tasks that are beyond its available capacity', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(2),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_DEFAULT,
|
||||
});
|
||||
|
||||
const shouldRun = mockRun();
|
||||
const shouldNotRun = mockRun();
|
||||
|
||||
const result = await pool.run([
|
||||
{ ...mockTask(), run: shouldRun },
|
||||
{ ...mockTask(), run: shouldRun },
|
||||
{ ...mockTask(), run: shouldNotRun },
|
||||
]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RanOutOfCapacity);
|
||||
expect(pool.availableCapacity()).toEqual(0);
|
||||
expect(shouldRun).toHaveBeenCalledTimes(2);
|
||||
expect(shouldNotRun).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test('should log when marking a Task as running fails', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(3),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_DEFAULT,
|
||||
});
|
||||
|
||||
const taskFailedToMarkAsRunning = mockTask();
|
||||
taskFailedToMarkAsRunning.markTaskAsRunning.mockImplementation(async () => {
|
||||
throw new Error(`Mark Task as running has failed miserably`);
|
||||
});
|
||||
|
||||
const result = await pool.run([mockTask(), taskFailedToMarkAsRunning, mockTask()]);
|
||||
|
||||
expect((logger as jest.Mocked<Logger>).error.mock.calls[0]).toMatchInlineSnapshot(`
|
||||
Array [
|
||||
"Failed to mark Task TaskType \\"shooooo\\" as running: Mark Task as running has failed miserably",
|
||||
]
|
||||
`);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAllClaimedTasks);
|
||||
});
|
||||
|
||||
test('should log when running a Task fails', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(3),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_DEFAULT,
|
||||
});
|
||||
|
||||
const taskFailedToRun = mockTask();
|
||||
taskFailedToRun.run.mockImplementation(async () => {
|
||||
throw new Error(`Run Task has failed miserably`);
|
||||
});
|
||||
|
||||
const result = await pool.run([mockTask(), taskFailedToRun, mockTask()]);
|
||||
|
||||
expect((logger as jest.Mocked<Logger>).warn.mock.calls[0]).toMatchInlineSnapshot(`
|
||||
Array [
|
||||
"Task TaskType \\"shooooo\\" failed in attempt to run: Run Task has failed miserably",
|
||||
]
|
||||
`);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAllClaimedTasks);
|
||||
});
|
||||
|
||||
test('should not log when running a Task fails due to the Task SO having been deleted while in flight', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(3),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_DEFAULT,
|
||||
});
|
||||
|
||||
const taskFailedToRun = mockTask();
|
||||
taskFailedToRun.run.mockImplementation(async () => {
|
||||
throw SavedObjectsErrorHelpers.createGenericNotFoundError('task', taskFailedToRun.id);
|
||||
});
|
||||
|
||||
const result = await pool.run([mockTask(), taskFailedToRun, mockTask()]);
|
||||
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
`Task TaskType "shooooo" failed in attempt to run: Saved object [task/${taskFailedToRun.id}] not found`
|
||||
);
|
||||
expect(logger.warn).not.toHaveBeenCalled();
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAllClaimedTasks);
|
||||
});
|
||||
|
||||
test('Running a task which fails still takes up capacity', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(1),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_DEFAULT,
|
||||
});
|
||||
|
||||
const taskFailedToRun = mockTask();
|
||||
taskFailedToRun.run.mockImplementation(async () => {
|
||||
await sleep(0);
|
||||
throw new Error(`Run Task has failed miserably`);
|
||||
});
|
||||
|
||||
const result = await pool.run([taskFailedToRun, mockTask()]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RanOutOfCapacity);
|
||||
});
|
||||
|
||||
test('clears up capacity when a task completes', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(1),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_DEFAULT,
|
||||
});
|
||||
|
||||
const firstWork = resolvable();
|
||||
const firstRun = sinon.spy(async () => {
|
||||
await sleep(0);
|
||||
firstWork.resolve();
|
||||
return asOk({ state: {} });
|
||||
});
|
||||
const secondWork = resolvable();
|
||||
const secondRun = sinon.spy(async () => {
|
||||
await sleep(0);
|
||||
secondWork.resolve();
|
||||
return asOk({ state: {} });
|
||||
});
|
||||
|
||||
const result = await pool.run([
|
||||
{ ...mockTask(), run: firstRun },
|
||||
{ ...mockTask(), run: secondRun },
|
||||
]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RanOutOfCapacity);
|
||||
expect(pool.usedCapacity).toEqual(1);
|
||||
expect(pool.availableCapacity()).toEqual(0);
|
||||
|
||||
await firstWork;
|
||||
sinon.assert.calledOnce(firstRun);
|
||||
sinon.assert.notCalled(secondRun);
|
||||
|
||||
expect(pool.usedCapacity).toEqual(0);
|
||||
await pool.run([{ ...mockTask(), run: secondRun }]);
|
||||
expect(pool.usedCapacity).toEqual(1);
|
||||
|
||||
expect(pool.availableCapacity()).toEqual(0);
|
||||
|
||||
await secondWork;
|
||||
|
||||
expect(pool.usedCapacity).toEqual(0);
|
||||
expect(pool.availableCapacity()).toEqual(1);
|
||||
sinon.assert.calledOnce(secondRun);
|
||||
});
|
||||
|
||||
test('run cancels expired tasks prior to running new tasks', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(2),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_DEFAULT,
|
||||
});
|
||||
|
||||
const haltUntilWeAfterFirstRun = resolvable();
|
||||
const taskHasExpired = resolvable();
|
||||
const haltTaskSoThatItCanBeCanceled = resolvable();
|
||||
|
||||
const shouldRun = sinon.spy(() => Promise.resolve());
|
||||
const shouldNotRun = sinon.spy(() => Promise.resolve());
|
||||
const now = new Date();
|
||||
const result = await pool.run([
|
||||
{
|
||||
...mockTask({ id: '1' }),
|
||||
async run() {
|
||||
await haltUntilWeAfterFirstRun;
|
||||
this.isExpired = true;
|
||||
taskHasExpired.resolve();
|
||||
await haltTaskSoThatItCanBeCanceled;
|
||||
return asOk({ state: {} });
|
||||
},
|
||||
get expiration() {
|
||||
return now;
|
||||
},
|
||||
get startedAt() {
|
||||
// 5 and a half minutes
|
||||
return moment(now).subtract(5, 'm').subtract(30, 's').toDate();
|
||||
},
|
||||
cancel: shouldRun,
|
||||
},
|
||||
{
|
||||
...mockTask({ id: '2' }),
|
||||
async run() {
|
||||
// halt here so that we can verify that this task is counted in `occupiedWorkers`
|
||||
await haltUntilWeAfterFirstRun;
|
||||
return asOk({ state: {} });
|
||||
},
|
||||
cancel: shouldNotRun,
|
||||
},
|
||||
]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAtCapacity);
|
||||
expect(pool.usedCapacity).toEqual(2);
|
||||
expect(pool.availableCapacity()).toEqual(0);
|
||||
|
||||
// release first stage in task so that it has time to expire, but not complete
|
||||
haltUntilWeAfterFirstRun.resolve();
|
||||
await taskHasExpired;
|
||||
|
||||
expect(await pool.run([{ ...mockTask({ id: '3' }) }])).toBeTruthy();
|
||||
|
||||
sinon.assert.calledOnce(shouldRun);
|
||||
sinon.assert.notCalled(shouldNotRun);
|
||||
|
||||
expect(pool.usedCapacity).toEqual(1);
|
||||
expect(pool.availableCapacity()).toEqual(1);
|
||||
|
||||
haltTaskSoThatItCanBeCanceled.resolve();
|
||||
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
`Cancelling task TaskType "shooooo" as it expired at ${now.toISOString()} after running for 05m 30s (with timeout set at 5m).`
|
||||
);
|
||||
});
|
||||
|
||||
test('calls to availableWorkers ensures we cancel expired tasks', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(1),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_DEFAULT,
|
||||
});
|
||||
|
||||
const taskIsRunning = resolvable();
|
||||
const taskHasExpired = resolvable();
|
||||
const cancel = sinon.spy(() => Promise.resolve());
|
||||
const now = new Date();
|
||||
expect(
|
||||
await pool.run([
|
||||
{
|
||||
...mockTask(),
|
||||
async run() {
|
||||
await sleep(10);
|
||||
this.isExpired = true;
|
||||
taskIsRunning.resolve();
|
||||
await taskHasExpired;
|
||||
return asOk({ state: {} });
|
||||
},
|
||||
get expiration() {
|
||||
return new Date(now.getTime() + 10);
|
||||
},
|
||||
get startedAt() {
|
||||
return now;
|
||||
},
|
||||
cancel,
|
||||
},
|
||||
])
|
||||
).toEqual(TaskPoolRunResult.RunningAtCapacity);
|
||||
|
||||
await taskIsRunning;
|
||||
|
||||
sinon.assert.notCalled(cancel);
|
||||
expect(pool.usedCapacity).toEqual(1);
|
||||
// The call to `availableCapacity` will clear the expired task so it's 1 instead of 0
|
||||
expect(pool.availableCapacity()).toEqual(1);
|
||||
sinon.assert.calledOnce(cancel);
|
||||
|
||||
expect(pool.usedCapacity).toEqual(0);
|
||||
expect(pool.availableCapacity()).toEqual(1);
|
||||
// ensure cancel isn't called twice
|
||||
sinon.assert.calledOnce(cancel);
|
||||
taskHasExpired.resolve();
|
||||
});
|
||||
|
||||
test('logs if cancellation errors', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(10),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_DEFAULT,
|
||||
});
|
||||
|
||||
const cancelled = resolvable();
|
||||
const result = await pool.run([
|
||||
{
|
||||
...mockTask(),
|
||||
async run() {
|
||||
this.isExpired = true;
|
||||
await sleep(10);
|
||||
return asOk({ state: {} });
|
||||
},
|
||||
async cancel() {
|
||||
cancelled.resolve();
|
||||
throw new Error('Dern!');
|
||||
},
|
||||
toString: () => '"shooooo!"',
|
||||
},
|
||||
]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAllClaimedTasks);
|
||||
await pool.run([]);
|
||||
|
||||
expect(pool.usedCapacity).toEqual(0);
|
||||
|
||||
// Allow the task to cancel...
|
||||
await cancelled;
|
||||
|
||||
expect((logger as jest.Mocked<Logger>).error.mock.calls[0][0]).toMatchInlineSnapshot(
|
||||
`"Failed to cancel task \\"shooooo!\\": Error: Dern!"`
|
||||
);
|
||||
});
|
||||
|
||||
test('only allows one task with the same id in the task pool', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(2),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_DEFAULT,
|
||||
});
|
||||
|
||||
const shouldRun = mockRun();
|
||||
const shouldNotRun = mockRun();
|
||||
|
||||
const taskId = uuidv4();
|
||||
const task1 = mockTask({ id: taskId, run: shouldRun });
|
||||
const task2 = mockTask({
|
||||
id: taskId,
|
||||
run: shouldNotRun,
|
||||
isSameTask() {
|
||||
return true;
|
||||
},
|
||||
});
|
||||
|
||||
await pool.run([task1]);
|
||||
await pool.run([task2]);
|
||||
|
||||
expect(shouldRun).toHaveBeenCalledTimes(1);
|
||||
expect(shouldNotRun).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('with CLAIM_STRATEGY_MGET', () => {
|
||||
test('usedCapacity is the sum of the cost of running tasks', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(10),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_MGET,
|
||||
});
|
||||
|
||||
const result = await pool.run([{ ...mockTask() }, { ...mockTask() }, { ...mockTask() }]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAllClaimedTasks);
|
||||
expect(pool.usedCapacity).toEqual(3 * TaskCost.Normal);
|
||||
});
|
||||
|
||||
test('availableCapacity are a function of total_capacity - usedCapacity', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(10),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_MGET,
|
||||
});
|
||||
|
||||
const result = await pool.run([{ ...mockTask() }, { ...mockTask() }, { ...mockTask() }]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAllClaimedTasks);
|
||||
expect(pool.availableCapacity()).toEqual(14);
|
||||
});
|
||||
|
||||
test('availableCapacity is 0 until capacity$ pushes a value', async () => {
|
||||
const capacity$ = new Subject<number>();
|
||||
const pool = new TaskPool({ capacity$, definitions, logger, strategy: CLAIM_STRATEGY_MGET });
|
||||
|
||||
expect(pool.availableCapacity()).toEqual(0);
|
||||
capacity$.next(20);
|
||||
expect(pool.availableCapacity()).toEqual(40);
|
||||
});
|
||||
|
||||
test('does not run tasks that are beyond its available capacity', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(2),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_MGET,
|
||||
});
|
||||
|
||||
const shouldRun = mockRun();
|
||||
const shouldNotRun = mockRun();
|
||||
|
||||
const result = await pool.run([
|
||||
{ ...mockTask(), run: shouldRun },
|
||||
{ ...mockTask(), run: shouldRun },
|
||||
{ ...mockTask(), run: shouldNotRun },
|
||||
]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RanOutOfCapacity);
|
||||
expect(pool.availableCapacity()).toEqual(0);
|
||||
expect(shouldRun).toHaveBeenCalledTimes(2);
|
||||
expect(shouldNotRun).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test('should log when marking a Task as running fails', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(6),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_MGET,
|
||||
});
|
||||
|
||||
const taskFailedToMarkAsRunning = mockTask();
|
||||
taskFailedToMarkAsRunning.markTaskAsRunning.mockImplementation(async () => {
|
||||
throw new Error(`Mark Task as running has failed miserably`);
|
||||
});
|
||||
|
||||
const result = await pool.run([mockTask(), taskFailedToMarkAsRunning, mockTask()]);
|
||||
|
||||
expect((logger as jest.Mocked<Logger>).error.mock.calls[0]).toMatchInlineSnapshot(`
|
||||
Array [
|
||||
"Failed to mark Task TaskType \\"shooooo\\" as running: Mark Task as running has failed miserably",
|
||||
]
|
||||
`);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAllClaimedTasks);
|
||||
});
|
||||
|
||||
test('should log when running a Task fails', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(3),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_MGET,
|
||||
});
|
||||
|
||||
const taskFailedToRun = mockTask();
|
||||
taskFailedToRun.run.mockImplementation(async () => {
|
||||
throw new Error(`Run Task has failed miserably`);
|
||||
});
|
||||
|
||||
const result = await pool.run([mockTask(), taskFailedToRun, mockTask()]);
|
||||
|
||||
expect((logger as jest.Mocked<Logger>).warn.mock.calls[0]).toMatchInlineSnapshot(`
|
||||
Array [
|
||||
"Task TaskType \\"shooooo\\" failed in attempt to run: Run Task has failed miserably",
|
||||
]
|
||||
`);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAllClaimedTasks);
|
||||
});
|
||||
|
||||
test('should not log when running a Task fails due to the Task SO having been deleted while in flight', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(3),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_MGET,
|
||||
});
|
||||
|
||||
const taskFailedToRun = mockTask();
|
||||
taskFailedToRun.run.mockImplementation(async () => {
|
||||
throw SavedObjectsErrorHelpers.createGenericNotFoundError('task', taskFailedToRun.id);
|
||||
});
|
||||
|
||||
const result = await pool.run([mockTask(), taskFailedToRun, mockTask()]);
|
||||
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
`Task TaskType "shooooo" failed in attempt to run: Saved object [task/${taskFailedToRun.id}] not found`
|
||||
);
|
||||
expect(logger.warn).not.toHaveBeenCalled();
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAllClaimedTasks);
|
||||
});
|
||||
|
||||
test('Running a task which fails still takes up capacity', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(1),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_MGET,
|
||||
});
|
||||
|
||||
const taskFailedToRun = mockTask();
|
||||
taskFailedToRun.run.mockImplementation(async () => {
|
||||
await sleep(0);
|
||||
throw new Error(`Run Task has failed miserably`);
|
||||
});
|
||||
|
||||
const result = await pool.run([taskFailedToRun, mockTask()]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RanOutOfCapacity);
|
||||
});
|
||||
|
||||
test('clears up capacity when a task completes', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(1),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_MGET,
|
||||
});
|
||||
|
||||
const firstWork = resolvable();
|
||||
const firstRun = sinon.spy(async () => {
|
||||
await sleep(0);
|
||||
firstWork.resolve();
|
||||
return asOk({ state: {} });
|
||||
});
|
||||
const secondWork = resolvable();
|
||||
const secondRun = sinon.spy(async () => {
|
||||
await sleep(0);
|
||||
secondWork.resolve();
|
||||
return asOk({ state: {} });
|
||||
});
|
||||
|
||||
const result = await pool.run([
|
||||
{ ...mockTask(), run: firstRun },
|
||||
{ ...mockTask(), run: secondRun },
|
||||
]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RanOutOfCapacity);
|
||||
expect(pool.usedCapacity).toEqual(2);
|
||||
expect(pool.availableCapacity()).toEqual(0);
|
||||
|
||||
await firstWork;
|
||||
sinon.assert.calledOnce(firstRun);
|
||||
sinon.assert.notCalled(secondRun);
|
||||
|
||||
expect(pool.usedCapacity).toEqual(0);
|
||||
await pool.run([{ ...mockTask(), run: secondRun }]);
|
||||
expect(pool.usedCapacity).toEqual(2);
|
||||
|
||||
expect(pool.availableCapacity()).toEqual(0);
|
||||
|
||||
await secondWork;
|
||||
|
||||
expect(pool.usedCapacity).toEqual(0);
|
||||
expect(pool.availableCapacity()).toEqual(2);
|
||||
sinon.assert.calledOnce(secondRun);
|
||||
});
|
||||
|
||||
test('run cancels expired tasks prior to running new tasks', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(2),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_MGET,
|
||||
});
|
||||
|
||||
const haltUntilWeAfterFirstRun = resolvable();
|
||||
const taskHasExpired = resolvable();
|
||||
const haltTaskSoThatItCanBeCanceled = resolvable();
|
||||
|
||||
const shouldRun = sinon.spy(() => Promise.resolve());
|
||||
const shouldNotRun = sinon.spy(() => Promise.resolve());
|
||||
const now = new Date();
|
||||
const result = await pool.run([
|
||||
{
|
||||
...mockTask({ id: '1' }),
|
||||
async run() {
|
||||
await haltUntilWeAfterFirstRun;
|
||||
this.isExpired = true;
|
||||
taskHasExpired.resolve();
|
||||
await haltTaskSoThatItCanBeCanceled;
|
||||
return asOk({ state: {} });
|
||||
},
|
||||
get expiration() {
|
||||
return now;
|
||||
},
|
||||
get startedAt() {
|
||||
// 5 and a half minutes
|
||||
return moment(now).subtract(5, 'm').subtract(30, 's').toDate();
|
||||
},
|
||||
cancel: shouldRun,
|
||||
},
|
||||
{
|
||||
...mockTask({ id: '2' }),
|
||||
async run() {
|
||||
// halt here so that we can verify that this task is counted in `occupiedWorkers`
|
||||
await haltUntilWeAfterFirstRun;
|
||||
return asOk({ state: {} });
|
||||
},
|
||||
cancel: shouldNotRun,
|
||||
},
|
||||
]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAtCapacity);
|
||||
expect(pool.usedCapacity).toEqual(4);
|
||||
expect(pool.availableCapacity()).toEqual(0);
|
||||
|
||||
// release first stage in task so that it has time to expire, but not complete
|
||||
haltUntilWeAfterFirstRun.resolve();
|
||||
await taskHasExpired;
|
||||
|
||||
expect(await pool.run([{ ...mockTask({ id: '3' }) }])).toBeTruthy();
|
||||
|
||||
sinon.assert.calledOnce(shouldRun);
|
||||
sinon.assert.notCalled(shouldNotRun);
|
||||
|
||||
expect(pool.usedCapacity).toEqual(2);
|
||||
expect(pool.availableCapacity()).toEqual(2);
|
||||
|
||||
haltTaskSoThatItCanBeCanceled.resolve();
|
||||
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
`Cancelling task TaskType "shooooo" as it expired at ${now.toISOString()} after running for 05m 30s (with timeout set at 5m).`
|
||||
);
|
||||
});
|
||||
|
||||
test('calls to availableWorkers ensures we cancel expired tasks', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(1),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_MGET,
|
||||
});
|
||||
|
||||
const taskIsRunning = resolvable();
|
||||
const taskHasExpired = resolvable();
|
||||
const cancel = sinon.spy(() => Promise.resolve());
|
||||
const now = new Date();
|
||||
expect(
|
||||
await pool.run([
|
||||
{
|
||||
...mockTask(),
|
||||
async run() {
|
||||
await sleep(10);
|
||||
this.isExpired = true;
|
||||
taskIsRunning.resolve();
|
||||
await taskHasExpired;
|
||||
return asOk({ state: {} });
|
||||
},
|
||||
get expiration() {
|
||||
return new Date(now.getTime() + 10);
|
||||
},
|
||||
get startedAt() {
|
||||
return now;
|
||||
},
|
||||
cancel,
|
||||
},
|
||||
])
|
||||
).toEqual(TaskPoolRunResult.RunningAtCapacity);
|
||||
|
||||
await taskIsRunning;
|
||||
|
||||
sinon.assert.notCalled(cancel);
|
||||
expect(pool.usedCapacity).toEqual(2);
|
||||
// The call to `availableCapacity` will clear the expired task so it's 2 instead of 0
|
||||
expect(pool.availableCapacity()).toEqual(2);
|
||||
sinon.assert.calledOnce(cancel);
|
||||
|
||||
expect(pool.usedCapacity).toEqual(0);
|
||||
expect(pool.availableCapacity()).toEqual(2);
|
||||
// ensure cancel isn't called twice
|
||||
sinon.assert.calledOnce(cancel);
|
||||
taskHasExpired.resolve();
|
||||
});
|
||||
|
||||
test('logs if cancellation errors', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(10),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_MGET,
|
||||
});
|
||||
|
||||
const cancelled = resolvable();
|
||||
const result = await pool.run([
|
||||
{
|
||||
...mockTask(),
|
||||
async run() {
|
||||
this.isExpired = true;
|
||||
await sleep(10);
|
||||
return asOk({ state: {} });
|
||||
},
|
||||
async cancel() {
|
||||
cancelled.resolve();
|
||||
throw new Error('Dern!');
|
||||
},
|
||||
toString: () => '"shooooo!"',
|
||||
},
|
||||
]);
|
||||
|
||||
expect(result).toEqual(TaskPoolRunResult.RunningAllClaimedTasks);
|
||||
await pool.run([]);
|
||||
|
||||
expect(pool.usedCapacity).toEqual(0);
|
||||
|
||||
// Allow the task to cancel...
|
||||
await cancelled;
|
||||
|
||||
expect((logger as jest.Mocked<Logger>).error.mock.calls[0][0]).toMatchInlineSnapshot(
|
||||
`"Failed to cancel task \\"shooooo!\\": Error: Dern!"`
|
||||
);
|
||||
});
|
||||
|
||||
test('only allows one task with the same id in the task pool', async () => {
|
||||
const pool = new TaskPool({
|
||||
capacity$: of(2),
|
||||
definitions,
|
||||
logger,
|
||||
strategy: CLAIM_STRATEGY_MGET,
|
||||
});
|
||||
|
||||
const shouldRun = mockRun();
|
||||
const shouldNotRun = mockRun();
|
||||
|
||||
const taskId = uuidv4();
|
||||
const task1 = mockTask({ id: taskId, run: shouldRun });
|
||||
const task2 = mockTask({
|
||||
id: taskId,
|
||||
run: shouldNotRun,
|
||||
isSameTask() {
|
||||
return true;
|
||||
},
|
||||
});
|
||||
|
||||
await pool.run([task1]);
|
||||
await pool.run([task2]);
|
||||
|
||||
expect(shouldRun).toHaveBeenCalledTimes(1);
|
||||
expect(shouldNotRun).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
|
@ -1,53 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import { asOk } from '../lib/result_type';
|
||||
import { sleep } from '../test_utils';
|
||||
import { TaskRunningStage } from '../task_running';
|
||||
import { TaskCost } from '../task';
|
||||
|
||||
export function mockRun() {
|
||||
return jest.fn(async () => {
|
||||
await sleep(0);
|
||||
return asOk({ state: {} });
|
||||
});
|
||||
}
|
||||
|
||||
export function mockTask(overrides = {}, definitionOverrides = {}) {
|
||||
return {
|
||||
isExpired: false,
|
||||
taskExecutionId: uuidv4(),
|
||||
id: uuidv4(),
|
||||
cancel: async () => undefined,
|
||||
markTaskAsRunning: jest.fn(async () => true),
|
||||
run: mockRun(),
|
||||
stage: TaskRunningStage.PENDING,
|
||||
toString: () => `TaskType "shooooo"`,
|
||||
isAdHocTaskAndOutOfAttempts: false,
|
||||
removeTask: jest.fn(),
|
||||
get expiration() {
|
||||
return new Date();
|
||||
},
|
||||
get startedAt() {
|
||||
return new Date();
|
||||
},
|
||||
get definition() {
|
||||
return {
|
||||
type: '',
|
||||
title: '',
|
||||
timeout: '5m',
|
||||
cost: TaskCost.Normal,
|
||||
createTaskRunner: jest.fn(),
|
||||
...definitionOverrides,
|
||||
};
|
||||
},
|
||||
isSameTask() {
|
||||
return false;
|
||||
},
|
||||
...overrides,
|
||||
};
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { Observable } from 'rxjs';
|
||||
import { Logger } from '@kbn/core/server';
|
||||
import { TaskRunner } from '../task_running';
|
||||
import { TaskDefinition } from '../task';
|
||||
|
||||
export interface ICapacity {
|
||||
get capacity(): number;
|
||||
availableCapacity(
|
||||
tasksInPool: Map<string, TaskRunner>,
|
||||
taskDefinition?: TaskDefinition | null
|
||||
): number;
|
||||
usedCapacity(tasksInPool: Map<string, TaskRunner>): number;
|
||||
usedCapacityPercentage(tasksInPool: Map<string, TaskRunner>): number;
|
||||
getUsedCapacityByType(tasksInPool: TaskRunner[], type: string): number;
|
||||
determineTasksToRunBasedOnCapacity(
|
||||
tasks: TaskRunner[],
|
||||
availableCapacity: number
|
||||
): [TaskRunner[], TaskRunner[]];
|
||||
}
|
||||
|
||||
export interface CapacityOpts {
|
||||
capacity$: Observable<number>;
|
||||
logger: Logger;
|
||||
}
|
|
@ -1,16 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { TaskCost } from '../task';
|
||||
|
||||
// When configured capacity is the number of normal cost tasks that this Kibana
|
||||
// can run, the total available workers equals the capacity
|
||||
export const getCapacityInWorkers = (capacity: number) => capacity;
|
||||
|
||||
// When configured capacity is the number of normal cost tasks that this Kibana
|
||||
// can run, the total available cost equals the capacity multiplied by the cost of a normal task
|
||||
export const getCapacityInCost = (capacity: number) => capacity * TaskCost.Normal;
|
|
@ -1,176 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { loggingSystemMock } from '@kbn/core/server/mocks';
|
||||
import { of, Subject } from 'rxjs';
|
||||
import { TaskCost } from '../task';
|
||||
import { mockTask } from './test_utils';
|
||||
import { WorkerCapacity } from './worker_capacity';
|
||||
|
||||
const logger = loggingSystemMock.create().get();
|
||||
|
||||
describe('WorkerCapacity', () => {
|
||||
beforeEach(() => {
|
||||
jest.resetAllMocks();
|
||||
});
|
||||
|
||||
test('workers set based on capacity responds to changes from capacity$ observable', () => {
|
||||
const capacity$ = new Subject<number>();
|
||||
const pool = new WorkerCapacity({ capacity$, logger });
|
||||
|
||||
expect(pool.capacity).toBe(0);
|
||||
|
||||
capacity$.next(20);
|
||||
expect(pool.capacity).toBe(20);
|
||||
|
||||
capacity$.next(16);
|
||||
expect(pool.capacity).toBe(16);
|
||||
|
||||
capacity$.next(25);
|
||||
expect(pool.capacity).toBe(25);
|
||||
|
||||
expect(logger.debug).toHaveBeenCalledTimes(3);
|
||||
expect(logger.debug).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
'Task pool now using 20 as the max worker value which is based on a capacity of 20'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
'Task pool now using 16 as the max worker value which is based on a capacity of 16'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenNthCalledWith(
|
||||
3,
|
||||
'Task pool now using 25 as the max worker value which is based on a capacity of 25'
|
||||
);
|
||||
});
|
||||
|
||||
test('usedCapacity returns the number of tasks in the pool', () => {
|
||||
const pool = new WorkerCapacity({ capacity$: of(10), logger });
|
||||
|
||||
const tasksInPool = new Map([
|
||||
['1', { ...mockTask() }],
|
||||
['2', { ...mockTask({}, { cost: TaskCost.Tiny }) }],
|
||||
['3', { ...mockTask() }],
|
||||
]);
|
||||
|
||||
expect(pool.usedCapacity(tasksInPool)).toBe(3);
|
||||
});
|
||||
|
||||
test('usedCapacityPercentage returns the percentage of workers in use by tasks in the pool', () => {
|
||||
const pool = new WorkerCapacity({ capacity$: of(10), logger });
|
||||
|
||||
const tasksInPool = new Map([
|
||||
['1', { ...mockTask() }],
|
||||
['2', { ...mockTask({}, { cost: TaskCost.Tiny }) }],
|
||||
['3', { ...mockTask() }],
|
||||
]);
|
||||
|
||||
expect(pool.usedCapacityPercentage(tasksInPool)).toBe(30);
|
||||
});
|
||||
|
||||
test('usedCapacityByType returns the number of tasks of specified type in the pool', () => {
|
||||
const pool = new WorkerCapacity({ capacity$: of(10), logger });
|
||||
|
||||
const tasksInPool = [
|
||||
{ ...mockTask({}, { type: 'type1' }) },
|
||||
{ ...mockTask({}, { type: 'type1', cost: TaskCost.Tiny }) },
|
||||
{ ...mockTask({}, { type: 'type2' }) },
|
||||
];
|
||||
|
||||
expect(pool.getUsedCapacityByType(tasksInPool, 'type1')).toBe(2);
|
||||
expect(pool.getUsedCapacityByType(tasksInPool, 'type2')).toBe(1);
|
||||
expect(pool.getUsedCapacityByType(tasksInPool, 'type3')).toBe(0);
|
||||
});
|
||||
|
||||
test('availableCapacity returns the overall number of available workers when no task type is defined', () => {
|
||||
const pool = new WorkerCapacity({ capacity$: of(10), logger });
|
||||
|
||||
const tasksInPool = new Map([
|
||||
['1', { ...mockTask() }],
|
||||
['2', { ...mockTask({}, { cost: TaskCost.Tiny }) }],
|
||||
['3', { ...mockTask() }],
|
||||
]);
|
||||
|
||||
expect(pool.availableCapacity(tasksInPool)).toBe(7);
|
||||
});
|
||||
|
||||
test('availableCapacity returns the overall number of available workers when task type with no maxConcurrency is provided', () => {
|
||||
const pool = new WorkerCapacity({ capacity$: of(10), logger });
|
||||
|
||||
const tasksInPool = new Map([
|
||||
['1', { ...mockTask() }],
|
||||
['2', { ...mockTask({}, { cost: TaskCost.Tiny }) }],
|
||||
['3', { ...mockTask() }],
|
||||
]);
|
||||
|
||||
expect(
|
||||
pool.availableCapacity(tasksInPool, {
|
||||
type: 'type1',
|
||||
cost: TaskCost.Normal,
|
||||
createTaskRunner: jest.fn(),
|
||||
timeout: '5m',
|
||||
})
|
||||
).toBe(7);
|
||||
});
|
||||
|
||||
test('availableCapacity returns the number of available workers for the task type when task type with maxConcurrency is provided', () => {
|
||||
const pool = new WorkerCapacity({ capacity$: of(10), logger });
|
||||
|
||||
const tasksInPool = new Map([
|
||||
['1', { ...mockTask({}, { type: 'type1' }) }],
|
||||
['2', { ...mockTask({}, { cost: TaskCost.Tiny }) }],
|
||||
['3', { ...mockTask() }],
|
||||
]);
|
||||
|
||||
expect(
|
||||
pool.availableCapacity(tasksInPool, {
|
||||
type: 'type1',
|
||||
maxConcurrency: 3,
|
||||
cost: TaskCost.Normal,
|
||||
createTaskRunner: jest.fn(),
|
||||
timeout: '5m',
|
||||
})
|
||||
).toBe(2);
|
||||
});
|
||||
|
||||
describe('determineTasksToRunBasedOnCapacity', () => {
|
||||
test('runs all tasks if there are workers available', () => {
|
||||
const pool = new WorkerCapacity({ capacity$: of(10), logger });
|
||||
const tasks = [{ ...mockTask() }, { ...mockTask() }, { ...mockTask() }];
|
||||
const [tasksToRun, leftoverTasks] = pool.determineTasksToRunBasedOnCapacity(tasks, 10);
|
||||
|
||||
expect(tasksToRun).toEqual(tasks);
|
||||
expect(leftoverTasks).toEqual([]);
|
||||
});
|
||||
|
||||
test('splits tasks if there are more tasks than available workers', () => {
|
||||
const pool = new WorkerCapacity({ capacity$: of(10), logger });
|
||||
const tasks = [
|
||||
{ ...mockTask() },
|
||||
{ ...mockTask() },
|
||||
{ ...mockTask() },
|
||||
{ ...mockTask({}, { cost: TaskCost.ExtraLarge }) },
|
||||
{ ...mockTask({}, { cost: TaskCost.ExtraLarge }) },
|
||||
{ ...mockTask() },
|
||||
{ ...mockTask() },
|
||||
];
|
||||
const [tasksToRun, leftoverTasks] = pool.determineTasksToRunBasedOnCapacity(tasks, 5);
|
||||
|
||||
expect(tasksToRun).toEqual([tasks[0], tasks[1], tasks[2], tasks[3], tasks[4]]);
|
||||
expect(leftoverTasks).toEqual([tasks[5], tasks[6]]);
|
||||
});
|
||||
|
||||
test('does not run tasks if there is no capacity', () => {
|
||||
const pool = new WorkerCapacity({ capacity$: of(10), logger });
|
||||
const tasks = [{ ...mockTask() }, { ...mockTask() }, { ...mockTask() }];
|
||||
const [tasksToRun, leftoverTasks] = pool.determineTasksToRunBasedOnCapacity(tasks, 0);
|
||||
|
||||
expect(tasksToRun).toEqual([]);
|
||||
expect(leftoverTasks).toEqual(tasks);
|
||||
});
|
||||
});
|
||||
});
|
|
@ -1,95 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { Logger } from '@kbn/core/server';
|
||||
import { TaskRunner } from '../task_running';
|
||||
import { CapacityOpts, ICapacity } from './types';
|
||||
import { TaskDefinition } from '../task';
|
||||
import { getCapacityInWorkers } from './utils';
|
||||
|
||||
export class WorkerCapacity implements ICapacity {
|
||||
private workers: number = 0;
|
||||
private logger: Logger;
|
||||
|
||||
constructor(opts: CapacityOpts) {
|
||||
this.logger = opts.logger;
|
||||
opts.capacity$.subscribe((capacity) => {
|
||||
// Capacity config describes the number of normal-cost tasks that can be
|
||||
// run simulatenously. This directly corresponds to the number of workers to use.
|
||||
this.workers = getCapacityInWorkers(capacity);
|
||||
this.logger.debug(
|
||||
`Task pool now using ${this.workers} as the max worker value which is based on a capacity of ${capacity}`
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
public get capacity(): number {
|
||||
return this.workers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets how many workers are currently in use.
|
||||
*/
|
||||
public usedCapacity(tasksInPool: Map<string, TaskRunner>) {
|
||||
return tasksInPool.size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets % of workers in use
|
||||
*/
|
||||
public usedCapacityPercentage(tasksInPool: Map<string, TaskRunner>) {
|
||||
return this.capacity ? Math.round((this.usedCapacity(tasksInPool) * 100) / this.capacity) : 100;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets how many workers are currently in use by each type.
|
||||
*/
|
||||
public getUsedCapacityByType(tasksInPool: TaskRunner[], type: string) {
|
||||
return tasksInPool.reduce(
|
||||
(count, runningTask) => (runningTask.definition.type === type ? ++count : count),
|
||||
0
|
||||
);
|
||||
}
|
||||
|
||||
public availableCapacity(
|
||||
tasksInPool: Map<string, TaskRunner>,
|
||||
taskDefinition?: TaskDefinition | null
|
||||
): number {
|
||||
const allAvailableCapacity = this.capacity - this.usedCapacity(tasksInPool);
|
||||
if (taskDefinition && taskDefinition.maxConcurrency) {
|
||||
// calculate the max workers that can be used for this task type
|
||||
return Math.max(
|
||||
Math.min(
|
||||
allAvailableCapacity,
|
||||
taskDefinition.maxConcurrency -
|
||||
this.getUsedCapacityByType([...tasksInPool.values()], taskDefinition.type)
|
||||
),
|
||||
0
|
||||
);
|
||||
}
|
||||
|
||||
return allAvailableCapacity;
|
||||
}
|
||||
|
||||
public determineTasksToRunBasedOnCapacity(
|
||||
tasks: TaskRunner[],
|
||||
availableCapacity: number
|
||||
): [TaskRunner[], TaskRunner[]] {
|
||||
const tasksToRun: TaskRunner[] = [];
|
||||
const leftOverTasks: TaskRunner[] = [];
|
||||
|
||||
for (let i = 0; i < tasks.length; i++) {
|
||||
if (i >= availableCapacity) {
|
||||
leftOverTasks.push(tasks[i]);
|
||||
} else {
|
||||
tasksToRun.push(tasks[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return [tasksToRun, leftOverTasks];
|
||||
}
|
||||
}
|
|
@ -8,7 +8,7 @@
|
|||
import { schema } from '@kbn/config-schema';
|
||||
import { Client } from '@elastic/elasticsearch';
|
||||
import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
|
||||
import _, { omit } from 'lodash';
|
||||
import _ from 'lodash';
|
||||
import { first } from 'rxjs';
|
||||
|
||||
import {
|
||||
|
@ -18,7 +18,7 @@ import {
|
|||
SerializedConcreteTaskInstance,
|
||||
} from './task';
|
||||
import { elasticsearchServiceMock, savedObjectsServiceMock } from '@kbn/core/server/mocks';
|
||||
import { TaskStore, SearchOpts, AggregationOpts, taskInstanceToAttributes } from './task_store';
|
||||
import { TaskStore, SearchOpts, AggregationOpts } from './task_store';
|
||||
import { savedObjectsRepositoryMock } from '@kbn/core/server/mocks';
|
||||
import { SavedObjectAttributes, SavedObjectsErrorHelpers } from '@kbn/core/server';
|
||||
import { TaskTypeDictionary } from './task_type_dictionary';
|
||||
|
@ -292,16 +292,12 @@ describe('TaskStore', () => {
|
|||
});
|
||||
});
|
||||
|
||||
async function testFetch(
|
||||
opts?: SearchOpts,
|
||||
hits: Array<estypes.SearchHit<unknown>> = [],
|
||||
limitResponse: boolean = false
|
||||
) {
|
||||
async function testFetch(opts?: SearchOpts, hits: Array<estypes.SearchHit<unknown>> = []) {
|
||||
childEsClient.search.mockResponse({
|
||||
hits: { hits, total: hits.length },
|
||||
} as estypes.SearchResponse);
|
||||
|
||||
const result = await store.fetch(opts, limitResponse);
|
||||
const result = await store.fetch(opts);
|
||||
|
||||
expect(childEsClient.search).toHaveBeenCalledTimes(1);
|
||||
|
||||
|
@ -346,18 +342,6 @@ describe('TaskStore', () => {
|
|||
await expect(store.fetch()).rejects.toThrowErrorMatchingInlineSnapshot(`"Failure"`);
|
||||
expect(await firstErrorPromise).toMatchInlineSnapshot(`[Error: Failure]`);
|
||||
});
|
||||
|
||||
test('excludes state and params from source when excludeState is true', async () => {
|
||||
const { args } = await testFetch({}, [], true);
|
||||
expect(args).toMatchObject({
|
||||
index: 'tasky',
|
||||
body: {
|
||||
sort: [{ 'task.runAt': 'asc' }],
|
||||
query: { term: { type: 'task' } },
|
||||
},
|
||||
_source_excludes: ['task.state', 'task.params'],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('aggregate', () => {
|
||||
|
@ -631,11 +615,10 @@ describe('TaskStore', () => {
|
|||
|
||||
describe('bulkUpdate', () => {
|
||||
let store: TaskStore;
|
||||
const logger = mockLogger();
|
||||
|
||||
beforeAll(() => {
|
||||
store = new TaskStore({
|
||||
logger,
|
||||
logger: mockLogger(),
|
||||
index: 'tasky',
|
||||
taskManagerId: '',
|
||||
serializer,
|
||||
|
@ -688,125 +671,6 @@ describe('TaskStore', () => {
|
|||
expect(mockGetValidatedTaskInstanceForUpdating).toHaveBeenCalledWith(task, {
|
||||
validate: false,
|
||||
});
|
||||
|
||||
expect(savedObjectsClient.bulkUpdate).toHaveBeenCalledWith(
|
||||
[
|
||||
{
|
||||
id: task.id,
|
||||
type: 'task',
|
||||
version: task.version,
|
||||
attributes: taskInstanceToAttributes(task, task.id),
|
||||
},
|
||||
],
|
||||
{ refresh: false }
|
||||
);
|
||||
});
|
||||
|
||||
test(`validates whenever validate:true is passed-in`, async () => {
|
||||
const task = {
|
||||
runAt: mockedDate,
|
||||
scheduledAt: mockedDate,
|
||||
startedAt: null,
|
||||
retryAt: null,
|
||||
id: 'task:324242',
|
||||
params: { hello: 'world' },
|
||||
state: { foo: 'bar' },
|
||||
taskType: 'report',
|
||||
attempts: 3,
|
||||
status: 'idle' as TaskStatus,
|
||||
version: '123',
|
||||
ownerId: null,
|
||||
traceparent: '',
|
||||
};
|
||||
|
||||
savedObjectsClient.bulkUpdate.mockResolvedValue({
|
||||
saved_objects: [
|
||||
{
|
||||
id: '324242',
|
||||
type: 'task',
|
||||
attributes: {
|
||||
...task,
|
||||
state: '{"foo":"bar"}',
|
||||
params: '{"hello":"world"}',
|
||||
},
|
||||
references: [],
|
||||
version: '123',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
await store.bulkUpdate([task], { validate: true });
|
||||
|
||||
expect(mockGetValidatedTaskInstanceForUpdating).toHaveBeenCalledWith(task, {
|
||||
validate: true,
|
||||
});
|
||||
|
||||
expect(savedObjectsClient.bulkUpdate).toHaveBeenCalledWith(
|
||||
[
|
||||
{
|
||||
id: task.id,
|
||||
type: 'task',
|
||||
version: task.version,
|
||||
attributes: taskInstanceToAttributes(task, task.id),
|
||||
},
|
||||
],
|
||||
{ refresh: false }
|
||||
);
|
||||
});
|
||||
|
||||
test(`logs warning and doesn't validate whenever excludeLargeFields option is passed-in`, async () => {
|
||||
const task = {
|
||||
runAt: mockedDate,
|
||||
scheduledAt: mockedDate,
|
||||
startedAt: null,
|
||||
retryAt: null,
|
||||
id: 'task:324242',
|
||||
params: { hello: 'world' },
|
||||
state: { foo: 'bar' },
|
||||
taskType: 'report',
|
||||
attempts: 3,
|
||||
status: 'idle' as TaskStatus,
|
||||
version: '123',
|
||||
ownerId: null,
|
||||
traceparent: '',
|
||||
};
|
||||
|
||||
savedObjectsClient.bulkUpdate.mockResolvedValue({
|
||||
saved_objects: [
|
||||
{
|
||||
id: '324242',
|
||||
type: 'task',
|
||||
attributes: {
|
||||
...task,
|
||||
state: '{"foo":"bar"}',
|
||||
params: '{"hello":"world"}',
|
||||
},
|
||||
references: [],
|
||||
version: '123',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
await store.bulkUpdate([task], { validate: true, excludeLargeFields: true });
|
||||
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
`Skipping validation for bulk update because excludeLargeFields=true.`
|
||||
);
|
||||
expect(mockGetValidatedTaskInstanceForUpdating).toHaveBeenCalledWith(task, {
|
||||
validate: false,
|
||||
});
|
||||
|
||||
expect(savedObjectsClient.bulkUpdate).toHaveBeenCalledWith(
|
||||
[
|
||||
{
|
||||
id: task.id,
|
||||
type: 'task',
|
||||
version: task.version,
|
||||
attributes: omit(taskInstanceToAttributes(task, task.id), ['state', 'params']),
|
||||
},
|
||||
],
|
||||
{ refresh: false }
|
||||
);
|
||||
});
|
||||
|
||||
test('pushes error from saved objects client to errors$', async () => {
|
||||
|
|
|
@ -84,11 +84,6 @@ export interface FetchResult {
|
|||
versionMap: Map<string, ConcreteTaskInstanceVersion>;
|
||||
}
|
||||
|
||||
export interface BulkUpdateOpts {
|
||||
validate: boolean;
|
||||
excludeLargeFields?: boolean;
|
||||
}
|
||||
|
||||
export type BulkUpdateResult = Result<
|
||||
ConcreteTaskInstance,
|
||||
{ type: string; id: string; error: SavedObjectError }
|
||||
|
@ -113,7 +108,6 @@ export class TaskStore {
|
|||
public readonly taskManagerId: string;
|
||||
public readonly errors$ = new Subject<Error>();
|
||||
public readonly taskValidator: TaskValidator;
|
||||
private readonly logger: Logger;
|
||||
|
||||
private esClient: ElasticsearchClient;
|
||||
private esClientWithoutRetries: ElasticsearchClient;
|
||||
|
@ -140,7 +134,6 @@ export class TaskStore {
|
|||
this.serializer = opts.serializer;
|
||||
this.savedObjectsRepository = opts.savedObjectsRepository;
|
||||
this.adHocTaskCounter = opts.adHocTaskCounter;
|
||||
this.logger = opts.logger;
|
||||
this.taskValidator = new TaskValidator({
|
||||
logger: opts.logger,
|
||||
definitions: opts.definitions,
|
||||
|
@ -239,13 +232,15 @@ export class TaskStore {
|
|||
* Fetches a list of scheduled tasks with default sorting.
|
||||
*
|
||||
* @param opts - The query options used to filter tasks
|
||||
* @param limitResponse - Whether to exclude the task state and params from the source for a smaller respose payload
|
||||
*/
|
||||
public async fetch(
|
||||
{ sort = [{ 'task.runAt': 'asc' }], ...opts }: SearchOpts = {},
|
||||
limitResponse: boolean = false
|
||||
): Promise<FetchResult> {
|
||||
return this.search({ ...opts, sort }, limitResponse);
|
||||
public async fetch({
|
||||
sort = [{ 'task.runAt': 'asc' }],
|
||||
...opts
|
||||
}: SearchOpts = {}): Promise<FetchResult> {
|
||||
return this.search({
|
||||
...opts,
|
||||
sort,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -301,23 +296,13 @@ export class TaskStore {
|
|||
*/
|
||||
public async bulkUpdate(
|
||||
docs: ConcreteTaskInstance[],
|
||||
{ validate, excludeLargeFields = false }: BulkUpdateOpts
|
||||
options: { validate: boolean }
|
||||
): Promise<BulkUpdateResult[]> {
|
||||
// if we're excluding large fields (state and params), we cannot apply validation so log a warning
|
||||
if (validate && excludeLargeFields) {
|
||||
validate = false;
|
||||
this.logger.warn(`Skipping validation for bulk update because excludeLargeFields=true.`);
|
||||
}
|
||||
|
||||
const attributesByDocId = docs.reduce((attrsById, doc) => {
|
||||
const taskInstance = this.taskValidator.getValidatedTaskInstanceForUpdating(doc, {
|
||||
validate,
|
||||
validate: options.validate,
|
||||
});
|
||||
const taskAttributes = taskInstanceToAttributes(taskInstance, doc.id);
|
||||
attrsById.set(
|
||||
doc.id,
|
||||
excludeLargeFields ? omit(taskAttributes, 'state', 'params') : taskAttributes
|
||||
);
|
||||
attrsById.set(doc.id, taskInstanceToAttributes(taskInstance, doc.id));
|
||||
return attrsById;
|
||||
}, new Map());
|
||||
|
||||
|
@ -357,7 +342,7 @@ export class TaskStore {
|
|||
),
|
||||
});
|
||||
const result = this.taskValidator.getValidatedTaskInstanceFromReading(taskInstance, {
|
||||
validate,
|
||||
validate: options.validate,
|
||||
});
|
||||
return asOk(result);
|
||||
});
|
||||
|
@ -504,20 +489,18 @@ export class TaskStore {
|
|||
}
|
||||
}
|
||||
|
||||
private async search(
|
||||
opts: SearchOpts = {},
|
||||
limitResponse: boolean = false
|
||||
): Promise<FetchResult> {
|
||||
private async search(opts: SearchOpts = {}): Promise<FetchResult> {
|
||||
const { query } = ensureQueryOnlyReturnsTaskObjects(opts);
|
||||
|
||||
try {
|
||||
const result = await this.esClientWithoutRetries.search<SavedObjectsRawDoc['_source']>({
|
||||
index: this.index,
|
||||
ignore_unavailable: true,
|
||||
body: { ...opts, query },
|
||||
...(limitResponse ? { _source_excludes: ['task.state', 'task.params'] } : {}),
|
||||
body: {
|
||||
...opts,
|
||||
query,
|
||||
},
|
||||
});
|
||||
|
||||
const {
|
||||
hits: { hits: tasks },
|
||||
} = result;
|
||||
|
@ -644,10 +627,7 @@ export function correctVersionConflictsForContinuation(
|
|||
return maxDocs && versionConflicts + updated > maxDocs ? maxDocs - updated : versionConflicts;
|
||||
}
|
||||
|
||||
export function taskInstanceToAttributes(
|
||||
doc: TaskInstance,
|
||||
id: string
|
||||
): SerializedConcreteTaskInstance {
|
||||
function taskInstanceToAttributes(doc: TaskInstance, id: string): SerializedConcreteTaskInstance {
|
||||
return {
|
||||
...omit(doc, 'id', 'version'),
|
||||
params: JSON.stringify(doc.params || {}),
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
*/
|
||||
|
||||
import { get } from 'lodash';
|
||||
import { RunContext, TaskCost, TaskDefinition, TaskPriority } from './task';
|
||||
import { RunContext, TaskDefinition, TaskPriority } from './task';
|
||||
import { mockLogger } from './test_utils';
|
||||
import {
|
||||
sanitizeTaskDefinitions,
|
||||
|
@ -53,7 +53,6 @@ describe('taskTypeDictionary', () => {
|
|||
const logger = mockLogger();
|
||||
|
||||
beforeEach(() => {
|
||||
jest.resetAllMocks();
|
||||
definitions = new TaskTypeDictionary(logger);
|
||||
});
|
||||
|
||||
|
@ -65,7 +64,6 @@ describe('taskTypeDictionary', () => {
|
|||
expect(result).toMatchInlineSnapshot(`
|
||||
Array [
|
||||
Object {
|
||||
"cost": 2,
|
||||
"createTaskRunner": [Function],
|
||||
"description": "one super cool task",
|
||||
"timeout": "5m",
|
||||
|
@ -73,7 +71,6 @@ describe('taskTypeDictionary', () => {
|
|||
"type": "test_task_type_0",
|
||||
},
|
||||
Object {
|
||||
"cost": 2,
|
||||
"createTaskRunner": [Function],
|
||||
"description": "one super cool task",
|
||||
"timeout": "5m",
|
||||
|
@ -81,7 +78,6 @@ describe('taskTypeDictionary', () => {
|
|||
"type": "test_task_type_1",
|
||||
},
|
||||
Object {
|
||||
"cost": 2,
|
||||
"createTaskRunner": [Function],
|
||||
"description": "one super cool task",
|
||||
"timeout": "5m",
|
||||
|
@ -228,7 +224,6 @@ describe('taskTypeDictionary', () => {
|
|||
createTaskRunner: expect.any(Function),
|
||||
maxConcurrency: 2,
|
||||
priority: 1,
|
||||
cost: 2,
|
||||
timeout: '5m',
|
||||
title: 'foo',
|
||||
type: 'foo',
|
||||
|
@ -254,44 +249,6 @@ describe('taskTypeDictionary', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('uses task cost if specified', () => {
|
||||
definitions.registerTaskDefinitions({
|
||||
foo: {
|
||||
title: 'foo',
|
||||
maxConcurrency: 2,
|
||||
cost: TaskCost.ExtraLarge,
|
||||
createTaskRunner: jest.fn(),
|
||||
},
|
||||
});
|
||||
expect(definitions.get('foo')).toEqual({
|
||||
createTaskRunner: expect.any(Function),
|
||||
maxConcurrency: 2,
|
||||
cost: 10,
|
||||
timeout: '5m',
|
||||
title: 'foo',
|
||||
type: 'foo',
|
||||
});
|
||||
});
|
||||
|
||||
it('does not register task with invalid cost schema', () => {
|
||||
definitions.registerTaskDefinitions({
|
||||
foo: {
|
||||
title: 'foo',
|
||||
maxConcurrency: 2,
|
||||
cost: 23,
|
||||
createTaskRunner: jest.fn(),
|
||||
},
|
||||
});
|
||||
expect(logger.error).toHaveBeenCalledWith(
|
||||
`Could not sanitize task definitions: Invalid cost \"23\". Cost must be one of Tiny => 1,Normal => 2,ExtraLarge => 10`
|
||||
);
|
||||
expect(() => {
|
||||
definitions.get('foo');
|
||||
}).toThrowErrorMatchingInlineSnapshot(
|
||||
`"Unsupported task type \\"foo\\". Supported types are "`
|
||||
);
|
||||
});
|
||||
|
||||
it('throws error when registering duplicate task type', () => {
|
||||
definitions.registerTaskDefinitions({
|
||||
foo: {
|
||||
|
|
|
@ -7,13 +7,7 @@
|
|||
|
||||
import { ObjectType } from '@kbn/config-schema';
|
||||
import { Logger } from '@kbn/core/server';
|
||||
import {
|
||||
TaskDefinition,
|
||||
taskDefinitionSchema,
|
||||
TaskRunCreatorFunction,
|
||||
TaskPriority,
|
||||
TaskCost,
|
||||
} from './task';
|
||||
import { TaskDefinition, taskDefinitionSchema, TaskRunCreatorFunction, TaskPriority } from './task';
|
||||
import { CONCURRENCY_ALLOW_LIST_BY_TASK_TYPE } from './constants';
|
||||
|
||||
/**
|
||||
|
@ -56,10 +50,6 @@ export interface TaskRegisterDefinition {
|
|||
* claimed before low priority
|
||||
*/
|
||||
priority?: TaskPriority;
|
||||
/**
|
||||
* An optional definition of the cost associated with running the task.
|
||||
*/
|
||||
cost?: TaskCost;
|
||||
/**
|
||||
* An optional more detailed description of what this task does.
|
||||
*/
|
||||
|
|
|
@ -174,8 +174,7 @@ function getMockMonitoredHealth(overrides = {}): MonitoredHealth {
|
|||
timestamp: new Date().toISOString(),
|
||||
status: HealthStatus.OK,
|
||||
value: {
|
||||
capacity: { config: 10, as_cost: 20, as_workers: 10 },
|
||||
claim_strategy: 'default',
|
||||
max_workers: 10,
|
||||
poll_interval: 3000,
|
||||
request_capacity: 1000,
|
||||
monitored_aggregated_stats_refresh_rate: 5000,
|
||||
|
@ -194,19 +193,16 @@ function getMockMonitoredHealth(overrides = {}): MonitoredHealth {
|
|||
status: HealthStatus.OK,
|
||||
value: {
|
||||
count: 4,
|
||||
cost: 8,
|
||||
task_types: {
|
||||
actions_telemetry: { count: 2, cost: 4, status: { idle: 2 } },
|
||||
alerting_telemetry: { count: 1, cost: 2, status: { idle: 1 } },
|
||||
session_cleanup: { count: 1, cost: 2, status: { idle: 1 } },
|
||||
actions_telemetry: { count: 2, status: { idle: 2 } },
|
||||
alerting_telemetry: { count: 1, status: { idle: 1 } },
|
||||
session_cleanup: { count: 1, status: { idle: 1 } },
|
||||
},
|
||||
schedule: [],
|
||||
overdue: 0,
|
||||
overdue_cost: 0,
|
||||
overdue_non_recurring: 0,
|
||||
estimatedScheduleDensity: [],
|
||||
non_recurring: 20,
|
||||
non_recurring_cost: 40,
|
||||
owner_ids: 2,
|
||||
estimated_schedule_density: [],
|
||||
capacity_requirements: {
|
||||
|
|
|
@ -25,9 +25,7 @@
|
|||
"@kbn/alerting-state-types",
|
||||
"@kbn/core-saved-objects-api-server",
|
||||
"@kbn/logging",
|
||||
"@kbn/core-lifecycle-server",
|
||||
"@kbn/serverless",
|
||||
"@kbn/cloud-plugin"
|
||||
"@kbn/core-lifecycle-server"
|
||||
],
|
||||
"exclude": ["target/**/*"]
|
||||
}
|
||||
|
|
|
@ -140,12 +140,7 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
},
|
||||
},
|
||||
request_capacity: 1000,
|
||||
capacity: {
|
||||
config: 10,
|
||||
as_workers: 10,
|
||||
as_cost: 20,
|
||||
},
|
||||
claim_strategy: 'default',
|
||||
max_workers: 10,
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
@ -140,12 +140,7 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
},
|
||||
},
|
||||
request_capacity: 1000,
|
||||
capacity: {
|
||||
config: 10,
|
||||
as_workers: 10,
|
||||
as_cost: 20,
|
||||
},
|
||||
claim_strategy: 'unsafe_mget',
|
||||
max_workers: 10,
|
||||
});
|
||||
});
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue