mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 09:19:04 -04:00
[ML] Fixes polling for blocked anomaly detection jobs (#178246)
Fixes two issues: - When a job is in a blocked state (resetting, deleting reverting) but the underlying task [cannot be found](https://github.com/elastic/elasticsearch/issues/105928), the task polling fails to start correctly and instead enters a loop where the tasks are checked as fast as possible. - Some tasks can legitimately take a long time to run, but we still poll at the same 2 second rate. This PR fixes the feedback loop and adds a check for when a poll has been running for over a minute, the poll interval is increased to 2 minutes. Related to https://github.com/elastic/kibana/issues/171626
This commit is contained in:
parent
834371ebb2
commit
a66d42b50c
3 changed files with 31 additions and 11 deletions
|
@ -7,6 +7,8 @@
|
|||
|
||||
export const DEFAULT_REFRESH_INTERVAL_MS = 30000;
|
||||
export const MINIMUM_REFRESH_INTERVAL_MS = 1000;
|
||||
export const DELETING_JOBS_REFRESH_INTERVAL_MS = 2000;
|
||||
export const BLOCKED_JOBS_REFRESH_INTERVAL_MS = 2000;
|
||||
export const BLOCKED_JOBS_REFRESH_INTERVAL_SLOW_MS = 120000; // 2mins
|
||||
export const BLOCKED_JOBS_REFRESH_THRESHOLD_MS = 60000; // 1min
|
||||
export const RESETTING_JOBS_REFRESH_INTERVAL_MS = 1000;
|
||||
export const PROGRESS_JOBS_REFRESH_INTERVAL_MS = 2000;
|
||||
|
|
|
@ -24,7 +24,7 @@ import {
|
|||
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { deleteJobs } from '../utils';
|
||||
import { DELETING_JOBS_REFRESH_INTERVAL_MS } from '../../../../../../common/constants/jobs_list';
|
||||
import { BLOCKED_JOBS_REFRESH_INTERVAL_MS } from '../../../../../../common/constants/jobs_list';
|
||||
import { DeleteSpaceAwareItemCheckModal } from '../../../../components/delete_space_aware_item_check_modal';
|
||||
import type { MlSummaryJob } from '../../../../../../common/types/anomaly_detection_jobs';
|
||||
import { isManagedJob } from '../../../jobs_utils';
|
||||
|
@ -91,7 +91,7 @@ export const DeleteJobModal: FC<Props> = ({ setShowFunction, unsetShowFunction,
|
|||
setTimeout(() => {
|
||||
closeModal();
|
||||
refreshJobs();
|
||||
}, DELETING_JOBS_REFRESH_INTERVAL_MS);
|
||||
}, BLOCKED_JOBS_REFRESH_INTERVAL_MS);
|
||||
}, [jobIds, deleteUserAnnotations, deleteAlertingRules, closeModal, refreshJobs]);
|
||||
|
||||
if (modalVisible === false || jobIds.length === 0) {
|
||||
|
|
|
@ -26,7 +26,11 @@ import { JobsAwaitingNodeWarning } from '../../../../components/jobs_awaiting_no
|
|||
import { SavedObjectsWarning } from '../../../../components/saved_objects_warning';
|
||||
import { UpgradeWarning } from '../../../../components/upgrade';
|
||||
|
||||
import { DELETING_JOBS_REFRESH_INTERVAL_MS } from '../../../../../../common/constants/jobs_list';
|
||||
import {
|
||||
BLOCKED_JOBS_REFRESH_INTERVAL_MS,
|
||||
BLOCKED_JOBS_REFRESH_INTERVAL_SLOW_MS,
|
||||
BLOCKED_JOBS_REFRESH_THRESHOLD_MS,
|
||||
} from '../../../../../../common/constants/jobs_list';
|
||||
import { JobListMlAnomalyAlertFlyout } from '../../../../../alerting/ml_alerting_flyout';
|
||||
import { StopDatafeedsConfirmModal } from '../confirm_modals/stop_datafeeds_confirm_modal';
|
||||
import { CloseJobsConfirmModal } from '../confirm_modals/close_jobs_confirm_modal';
|
||||
|
@ -49,6 +53,7 @@ export class JobsListView extends Component {
|
|||
itemIdToExpandedRowMap: {},
|
||||
filterClauses: [],
|
||||
blockingJobIds: [],
|
||||
blockingJobsFirstFoundMs: null,
|
||||
jobsAwaitingNodeCount: 0,
|
||||
};
|
||||
|
||||
|
@ -350,14 +355,17 @@ export class JobsListView extends Component {
|
|||
});
|
||||
|
||||
this.isDoneRefreshing();
|
||||
if (
|
||||
blockingJobsRefreshTimeout === null &&
|
||||
jobsSummaryList.some((j) => j.blocked !== undefined)
|
||||
) {
|
||||
if (jobsSummaryList.some((j) => j.blocked !== undefined)) {
|
||||
// if there are some jobs in a deleting state, start polling for
|
||||
// deleting jobs so we can update the jobs list once the
|
||||
// deleting tasks are over
|
||||
this.checkBlockingJobTasks(true);
|
||||
if (this.state.blockingJobsFirstFoundMs === null) {
|
||||
// keep a record of when the first blocked job was found
|
||||
this.setState({ blockingJobsFirstFoundMs: Date.now() });
|
||||
}
|
||||
} else {
|
||||
this.setState({ blockingJobsFirstFoundMs: null });
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
|
@ -366,7 +374,7 @@ export class JobsListView extends Component {
|
|||
}
|
||||
|
||||
async checkBlockingJobTasks(forceRefresh = false) {
|
||||
if (this._isMounted === false) {
|
||||
if (this._isMounted === false || blockingJobsRefreshTimeout !== null) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -384,14 +392,24 @@ export class JobsListView extends Component {
|
|||
this.refreshJobSummaryList();
|
||||
}
|
||||
|
||||
if (blockingJobIds.length > 0 && blockingJobsRefreshTimeout === null) {
|
||||
if (this.state.blockingJobsFirstFoundMs !== null || blockingJobIds.length > 0) {
|
||||
blockingJobsRefreshTimeout = setTimeout(() => {
|
||||
blockingJobsRefreshTimeout = null;
|
||||
this.checkBlockingJobTasks();
|
||||
}, DELETING_JOBS_REFRESH_INTERVAL_MS);
|
||||
}, this.getBlockedJobsRefreshInterval());
|
||||
}
|
||||
}
|
||||
|
||||
getBlockedJobsRefreshInterval() {
|
||||
const runningTimeMs = Date.now() - this.state.blockingJobsFirstFoundMs;
|
||||
if (runningTimeMs > BLOCKED_JOBS_REFRESH_THRESHOLD_MS) {
|
||||
// if the jobs have been in a blocked state for more than a minute
|
||||
// increase the polling interval
|
||||
return BLOCKED_JOBS_REFRESH_INTERVAL_SLOW_MS;
|
||||
}
|
||||
return BLOCKED_JOBS_REFRESH_INTERVAL_MS;
|
||||
}
|
||||
|
||||
renderJobsListComponents() {
|
||||
const { isRefreshing, loading, jobsSummaryList, jobsAwaitingNodeCount } = this.state;
|
||||
const jobIds = jobsSummaryList.map((j) => j.id);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue