mirror of
https://github.com/elastic/kibana.git
synced 2025-06-27 18:51:07 -04:00
Fixes https://github.com/elastic/kibana/issues/151938 In this PR, I'm re-writing the Task Manager poller so it doesn't run concurrently when timeouts occur while also fixing the issue where polling requests would pile up when polling takes time. To support this, I've also made the following changes: - Removed the observable monitor and the `xpack.task_manager.max_poll_inactivity_cycles` setting - Make the task store `search` and `updateByQuery` functions have no retries. This prevents the request from retrying 5x whenever a timeout occurs, causing each call taking up to 2 1/2 minutes before Kibana sees the error (now down to 30s each). We have polling to manage retries in these situations. - Switch the task poller tests to use `sinon` for faking timers - Removing the `assertStillInSetup` checks on plugin setup. Felt like a maintenance burden that wasn't necessary to fix with my code changes. The main code changes are within these files (to review thoroughly so the polling cycle doesn't suddenly stop): - x-pack/plugins/task_manager/server/polling/task_poller.ts - x-pack/plugins/task_manager/server/polling_lifecycle.ts (easier to review if you disregard whitespace `?w=1`) ## To verify 1. Tasks run normally (create a rule or something that goes through task manager regularly). 2. When the update by query takes a while, the request is cancelled after 30s or the time manually configured. 4. When the search for claimed tasks query takes a while, the request is cancelled after 30s or the time manually configured. **Tips:** <details><summary>how to slowdown search for claimed task queries</summary> ``` diff --git a/x-pack/plugins/task_manager/server/queries/task_claiming.ts b/x-pack/plugins/task_manager/server/queries/task_claiming.ts index 07042650a37..2caefd63672 100644 --- a/x-pack/plugins/task_manager/server/queries/task_claiming.ts +++ b/x-pack/plugins/task_manager/server/queries/task_claiming.ts @@ -247,7 +247,7 @@ export class TaskClaiming { taskTypes, }); - const docs = tasksUpdated > 0 ? await this.sweepForClaimedTasks(taskTypes, size) : []; + const docs = await this.sweepForClaimedTasks(taskTypes, size); this.emitEvents(docs.map((doc) => asTaskClaimEvent(doc.id, asOk(doc)))); @@ -346,6 +346,13 @@ export class TaskClaiming { size, sort: SortByRunAtAndRetryAt, seq_no_primary_term: true, + aggs: { + delay: { + shard_delay: { + value: '40s', + }, + }, + }, }); return docs; ``` </details> <details><summary>how to slow down update by queries</summary> Not the cleanest way but you'll see occasional request timeouts from the updateByQuery calls. I had more luck creating rules running every 1s. ``` diff --git a/x-pack/plugins/task_manager/server/task_store.ts b/x-pack/plugins/task_manager/server/task_store.ts index a06ee7b918a..07aa81e5388 100644 --- a/x-pack/plugins/task_manager/server/task_store.ts +++ b/x-pack/plugins/task_manager/server/task_store.ts @@ -126,6 +126,7 @@ export class TaskStore { // Timeouts are retried and make requests timeout after (requestTimeout * (1 + maxRetries)) // The poller doesn't need retry logic because it will try again at the next polling cycle maxRetries: 0, + requestTimeout: 900, }); } @@ -458,6 +459,7 @@ export class TaskStore { ignore_unavailable: true, refresh: true, conflicts: 'proceed', + requests_per_second: 1, body: { ...opts, max_docs, ``` </details> --------- Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
300 lines
11 KiB
TypeScript
300 lines
11 KiB
TypeScript
/*
|
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
|
* or more contributor license agreements. Licensed under the Elastic License
|
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
|
* 2.0.
|
|
*/
|
|
|
|
import { combineLatest, Observable, Subject } from 'rxjs';
|
|
import { map, distinctUntilChanged } from 'rxjs/operators';
|
|
import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
|
|
import { UsageCollectionSetup, UsageCounter } from '@kbn/usage-collection-plugin/server';
|
|
import {
|
|
PluginInitializerContext,
|
|
Plugin,
|
|
CoreSetup,
|
|
Logger,
|
|
CoreStart,
|
|
ServiceStatusLevels,
|
|
CoreStatus,
|
|
} from '@kbn/core/server';
|
|
import { TaskPollingLifecycle } from './polling_lifecycle';
|
|
import { TaskManagerConfig } from './config';
|
|
import { createInitialMiddleware, addMiddlewareToChain, Middleware } from './lib/middleware';
|
|
import { removeIfExists } from './lib/remove_if_exists';
|
|
import { setupSavedObjects } from './saved_objects';
|
|
import { TaskDefinitionRegistry, TaskTypeDictionary, REMOVED_TYPES } from './task_type_dictionary';
|
|
import { AggregationOpts, FetchResult, SearchOpts, TaskStore } from './task_store';
|
|
import { createManagedConfiguration } from './lib/create_managed_configuration';
|
|
import { TaskScheduling } from './task_scheduling';
|
|
import { backgroundTaskUtilizationRoute, healthRoute } from './routes';
|
|
import { createMonitoringStats, MonitoringStats } from './monitoring';
|
|
import { EphemeralTaskLifecycle } from './ephemeral_task_lifecycle';
|
|
import { EphemeralTask, ConcreteTaskInstance } from './task';
|
|
import { registerTaskManagerUsageCollector } from './usage';
|
|
import { TASK_MANAGER_INDEX } from './constants';
|
|
import { AdHocTaskCounter } from './lib/adhoc_task_counter';
|
|
|
|
export interface TaskManagerSetupContract {
|
|
/**
|
|
* @deprecated
|
|
*/
|
|
index: string;
|
|
addMiddleware: (middleware: Middleware) => void;
|
|
/**
|
|
* Method for allowing consumers to register task definitions into the system.
|
|
* @param taskDefinitions - The Kibana task definitions dictionary
|
|
*/
|
|
registerTaskDefinitions: (taskDefinitions: TaskDefinitionRegistry) => void;
|
|
}
|
|
|
|
export type TaskManagerStartContract = Pick<
|
|
TaskScheduling,
|
|
| 'schedule'
|
|
| 'runSoon'
|
|
| 'ephemeralRunNow'
|
|
| 'ensureScheduled'
|
|
| 'bulkUpdateSchedules'
|
|
| 'bulkEnable'
|
|
| 'bulkDisable'
|
|
| 'bulkSchedule'
|
|
> &
|
|
Pick<TaskStore, 'fetch' | 'aggregate' | 'get' | 'remove' | 'bulkRemove'> & {
|
|
removeIfExists: TaskStore['remove'];
|
|
} & {
|
|
supportsEphemeralTasks: () => boolean;
|
|
getRegisteredTypes: () => string[];
|
|
};
|
|
|
|
export class TaskManagerPlugin
|
|
implements Plugin<TaskManagerSetupContract, TaskManagerStartContract>
|
|
{
|
|
private taskPollingLifecycle?: TaskPollingLifecycle;
|
|
private ephemeralTaskLifecycle?: EphemeralTaskLifecycle;
|
|
private taskManagerId?: string;
|
|
private usageCounter?: UsageCounter;
|
|
private config: TaskManagerConfig;
|
|
private logger: Logger;
|
|
private definitions: TaskTypeDictionary;
|
|
private middleware: Middleware = createInitialMiddleware();
|
|
private elasticsearchAndSOAvailability$?: Observable<boolean>;
|
|
private monitoringStats$ = new Subject<MonitoringStats>();
|
|
private shouldRunBackgroundTasks: boolean;
|
|
private readonly kibanaVersion: PluginInitializerContext['env']['packageInfo']['version'];
|
|
private adHocTaskCounter: AdHocTaskCounter;
|
|
|
|
constructor(private readonly initContext: PluginInitializerContext) {
|
|
this.initContext = initContext;
|
|
this.logger = initContext.logger.get();
|
|
this.config = initContext.config.get<TaskManagerConfig>();
|
|
this.definitions = new TaskTypeDictionary(this.logger);
|
|
this.kibanaVersion = initContext.env.packageInfo.version;
|
|
this.shouldRunBackgroundTasks = initContext.node.roles.backgroundTasks;
|
|
this.adHocTaskCounter = new AdHocTaskCounter();
|
|
}
|
|
|
|
public setup(
|
|
core: CoreSetup,
|
|
plugins: { usageCollection?: UsageCollectionSetup }
|
|
): TaskManagerSetupContract {
|
|
this.elasticsearchAndSOAvailability$ = getElasticsearchAndSOAvailability(core.status.core$);
|
|
|
|
setupSavedObjects(core.savedObjects, this.config);
|
|
this.taskManagerId = this.initContext.env.instanceUuid;
|
|
|
|
if (!this.taskManagerId) {
|
|
this.logger.error(
|
|
`TaskManager is unable to start as there the Kibana UUID is invalid (value of the "server.uuid" configuration is ${this.taskManagerId})`
|
|
);
|
|
throw new Error(`TaskManager is unable to start as Kibana has no valid UUID assigned to it.`);
|
|
} else {
|
|
this.logger.info(`TaskManager is identified by the Kibana UUID: ${this.taskManagerId}`);
|
|
}
|
|
|
|
const startServicesPromise = core.getStartServices().then(([coreServices]) => ({
|
|
elasticsearch: coreServices.elasticsearch,
|
|
}));
|
|
|
|
this.usageCounter = plugins.usageCollection?.createUsageCounter(`taskManager`);
|
|
|
|
// Routes
|
|
const router = core.http.createRouter();
|
|
const { serviceStatus$, monitoredHealth$ } = healthRoute({
|
|
router,
|
|
monitoringStats$: this.monitoringStats$,
|
|
logger: this.logger,
|
|
taskManagerId: this.taskManagerId,
|
|
config: this.config!,
|
|
usageCounter: this.usageCounter!,
|
|
kibanaVersion: this.kibanaVersion,
|
|
kibanaIndexName: core.savedObjects.getDefaultIndex(),
|
|
getClusterClient: () =>
|
|
startServicesPromise.then(({ elasticsearch }) => elasticsearch.client),
|
|
shouldRunTasks: this.shouldRunBackgroundTasks,
|
|
docLinks: core.docLinks,
|
|
});
|
|
const monitoredUtilization$ = backgroundTaskUtilizationRoute({
|
|
router,
|
|
monitoringStats$: this.monitoringStats$,
|
|
logger: this.logger,
|
|
taskManagerId: this.taskManagerId,
|
|
config: this.config!,
|
|
usageCounter: this.usageCounter!,
|
|
kibanaVersion: this.kibanaVersion,
|
|
kibanaIndexName: core.savedObjects.getDefaultIndex(),
|
|
getClusterClient: () =>
|
|
startServicesPromise.then(({ elasticsearch }) => elasticsearch.client),
|
|
});
|
|
|
|
core.status.derivedStatus$.subscribe((status) =>
|
|
this.logger.debug(`status core.status.derivedStatus now set to ${status.level}`)
|
|
);
|
|
serviceStatus$.subscribe((status) =>
|
|
this.logger.debug(`status serviceStatus now set to ${status.level}`)
|
|
);
|
|
|
|
// here is where the system status is updated
|
|
core.status.set(
|
|
combineLatest([core.status.derivedStatus$, serviceStatus$]).pipe(
|
|
map(([derivedStatus, serviceStatus]) =>
|
|
serviceStatus.level > derivedStatus.level ? serviceStatus : derivedStatus
|
|
)
|
|
)
|
|
);
|
|
|
|
const usageCollection = plugins.usageCollection;
|
|
if (usageCollection) {
|
|
registerTaskManagerUsageCollector(
|
|
usageCollection,
|
|
monitoredHealth$,
|
|
monitoredUtilization$,
|
|
this.config.ephemeral_tasks.enabled,
|
|
this.config.ephemeral_tasks.request_capacity,
|
|
this.config.unsafe.exclude_task_types
|
|
);
|
|
}
|
|
|
|
if (this.config.unsafe.exclude_task_types.length) {
|
|
this.logger.warn(
|
|
`Excluding task types from execution: ${this.config.unsafe.exclude_task_types.join(', ')}`
|
|
);
|
|
}
|
|
|
|
return {
|
|
index: TASK_MANAGER_INDEX,
|
|
addMiddleware: (middleware: Middleware) => {
|
|
this.middleware = addMiddlewareToChain(this.middleware, middleware);
|
|
},
|
|
registerTaskDefinitions: (taskDefinition: TaskDefinitionRegistry) => {
|
|
this.definitions.registerTaskDefinitions(taskDefinition);
|
|
},
|
|
};
|
|
}
|
|
|
|
public start({
|
|
savedObjects,
|
|
elasticsearch,
|
|
executionContext,
|
|
docLinks,
|
|
}: CoreStart): TaskManagerStartContract {
|
|
const savedObjectsRepository = savedObjects.createInternalRepository(['task']);
|
|
|
|
const serializer = savedObjects.createSerializer();
|
|
const taskStore = new TaskStore({
|
|
serializer,
|
|
savedObjectsRepository,
|
|
esClient: elasticsearch.client.asInternalUser,
|
|
index: TASK_MANAGER_INDEX,
|
|
definitions: this.definitions,
|
|
taskManagerId: `kibana:${this.taskManagerId!}`,
|
|
adHocTaskCounter: this.adHocTaskCounter,
|
|
});
|
|
|
|
const managedConfiguration = createManagedConfiguration({
|
|
logger: this.logger,
|
|
errors$: taskStore.errors$,
|
|
startingMaxWorkers: this.config!.max_workers,
|
|
startingPollInterval: this.config!.poll_interval,
|
|
});
|
|
|
|
// Only poll for tasks if configured to run tasks
|
|
if (this.shouldRunBackgroundTasks) {
|
|
this.taskPollingLifecycle = new TaskPollingLifecycle({
|
|
config: this.config!,
|
|
definitions: this.definitions,
|
|
unusedTypes: REMOVED_TYPES,
|
|
logger: this.logger,
|
|
executionContext,
|
|
taskStore,
|
|
usageCounter: this.usageCounter,
|
|
middleware: this.middleware,
|
|
elasticsearchAndSOAvailability$: this.elasticsearchAndSOAvailability$!,
|
|
...managedConfiguration,
|
|
});
|
|
|
|
this.ephemeralTaskLifecycle = new EphemeralTaskLifecycle({
|
|
config: this.config!,
|
|
definitions: this.definitions,
|
|
logger: this.logger,
|
|
executionContext,
|
|
middleware: this.middleware,
|
|
elasticsearchAndSOAvailability$: this.elasticsearchAndSOAvailability$!,
|
|
pool: this.taskPollingLifecycle.pool,
|
|
lifecycleEvent: this.taskPollingLifecycle.events,
|
|
});
|
|
}
|
|
|
|
createMonitoringStats(
|
|
taskStore,
|
|
this.elasticsearchAndSOAvailability$!,
|
|
this.config!,
|
|
managedConfiguration,
|
|
this.logger,
|
|
this.adHocTaskCounter,
|
|
this.taskPollingLifecycle,
|
|
this.ephemeralTaskLifecycle
|
|
).subscribe((stat) => this.monitoringStats$.next(stat));
|
|
|
|
const taskScheduling = new TaskScheduling({
|
|
logger: this.logger,
|
|
taskStore,
|
|
middleware: this.middleware,
|
|
ephemeralTaskLifecycle: this.ephemeralTaskLifecycle,
|
|
taskManagerId: taskStore.taskManagerId,
|
|
});
|
|
|
|
return {
|
|
fetch: (opts: SearchOpts): Promise<FetchResult> => taskStore.fetch(opts),
|
|
aggregate: (opts: AggregationOpts): Promise<estypes.SearchResponse<ConcreteTaskInstance>> =>
|
|
taskStore.aggregate(opts),
|
|
get: (id: string) => taskStore.get(id),
|
|
remove: (id: string) => taskStore.remove(id),
|
|
bulkRemove: (ids: string[]) => taskStore.bulkRemove(ids),
|
|
removeIfExists: (id: string) => removeIfExists(taskStore, id),
|
|
schedule: (...args) => taskScheduling.schedule(...args),
|
|
bulkSchedule: (...args) => taskScheduling.bulkSchedule(...args),
|
|
ensureScheduled: (...args) => taskScheduling.ensureScheduled(...args),
|
|
runSoon: (...args) => taskScheduling.runSoon(...args),
|
|
bulkEnable: (...args) => taskScheduling.bulkEnable(...args),
|
|
bulkDisable: (...args) => taskScheduling.bulkDisable(...args),
|
|
bulkUpdateSchedules: (...args) => taskScheduling.bulkUpdateSchedules(...args),
|
|
ephemeralRunNow: (task: EphemeralTask) => taskScheduling.ephemeralRunNow(task),
|
|
supportsEphemeralTasks: () =>
|
|
this.config.ephemeral_tasks.enabled && this.shouldRunBackgroundTasks,
|
|
getRegisteredTypes: () => this.definitions.getAllTypes(),
|
|
};
|
|
}
|
|
}
|
|
|
|
export function getElasticsearchAndSOAvailability(
|
|
core$: Observable<CoreStatus>
|
|
): Observable<boolean> {
|
|
return core$.pipe(
|
|
map(
|
|
({ elasticsearch, savedObjects }) =>
|
|
elasticsearch.level === ServiceStatusLevels.available &&
|
|
savedObjects.level === ServiceStatusLevels.available
|
|
),
|
|
distinctUntilChanged()
|
|
);
|
|
}
|