Make telemetry task use a schedule instead of scheduling explicitly for midnight (#153380)

Fixes https://github.com/elastic/kibana/issues/140973
Fixes https://github.com/elastic/kibana-team/issues/563

In this PR, I'm fixing flaky tests that caused extra telemetry runs
whenever CI would run them near midnight UTC. The assertion expected two
runs while sometimes a 3rd run would happen if the test ran near
midnight when the telemetry task was scheduled to run again..

To fix this, I've moved away from the midnight scheduling given
telemetry only needs to be reported daily, and moved the task to use a
`schedule` within task manager to make the task run daily (+24hrs from
the previous run). This also improves error handling given task manager
will now know it's a recurring task and recurring tasks never get marked
as `failed`.

The following verification steps can be done using this query in Dev
Tools

```
GET .kibana_task_manager/_search
{
  "query": {
    "term": {
      "task.taskType": "actions_telemetry"
    }
  }
}
```

## To verify existing tasks migrating to a schedule
1. Using `main`, setup a fresh Kibana and ES instance
2. Keep Elasticsearch running but shut down Kibana after setup is
complete
3. Switch from `main` to this PR
4. Add `await taskManager.runSoon(TASK_ID);` after the `ensureScheduled`
call within `x-pack/plugins/actions/server/usage/task.ts`.
5. Startup Kibana
6. Go in Dev Tools and pull the task information to see a new `schedule`
attribute added

## To verify fresh installs
1. Using this PR code, setup a fresh Kibana and ES instance
2. Go in Dev Tools and pull the task information to see a new `schedule`
attribute added

Flaky test runner:
https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/2017

---------

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Mike Côté 2023-03-23 08:59:31 -04:00 committed by GitHub
parent bbe3d52652
commit 6a169325a5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 17 additions and 15 deletions

View file

@ -6,11 +6,11 @@
*/
import { Logger, CoreSetup } from '@kbn/core/server';
import moment from 'moment';
import {
RunContext,
TaskManagerSetupContract,
TaskManagerStartContract,
IntervalSchedule,
} from '@kbn/task-manager-plugin/server';
import { PreConfiguredAction } from '../types';
import { getTotalCount, getInUseTotalCount, getExecutionsPerDayCount } from './actions_telemetry';
@ -18,6 +18,7 @@ import { getTotalCount, getInUseTotalCount, getExecutionsPerDayCount } from './a
export const TELEMETRY_TASK_TYPE = 'actions_telemetry';
export const TASK_ID = `Actions-${TELEMETRY_TASK_TYPE}`;
export const SCHEDULE: IntervalSchedule = { interval: '1d' };
export function initializeActionsTelemetry(
logger: Logger,
@ -71,6 +72,7 @@ async function scheduleTasks(logger: Logger, taskManager: TaskManagerStartContra
taskType: TELEMETRY_TASK_TYPE,
state: {},
params: {},
schedule: SCHEDULE,
});
} catch (e) {
logger.debug(`Error scheduling task, received ${e.message}`);
@ -133,14 +135,12 @@ export function telemetryTaskRunner(
count_connector_types_by_action_run_outcome_per_day:
totalExecutionsPerDay.countRunOutcomeByConnectorType,
},
runAt: getNextMidnight(),
// Useful for setting a schedule for the old tasks that don't have one
// or to update the schedule if ever the frequency changes in code
schedule: SCHEDULE,
};
});
},
};
};
}
function getNextMidnight() {
return moment().add(1, 'd').startOf('d').toDate();
}

View file

@ -6,11 +6,11 @@
*/
import { Logger, CoreSetup } from '@kbn/core/server';
import moment from 'moment';
import {
RunContext,
TaskManagerSetupContract,
TaskManagerStartContract,
IntervalSchedule,
} from '@kbn/task-manager-plugin/server';
import { getFailedAndUnrecognizedTasksPerDay } from './lib/get_telemetry_from_task_manager';
@ -23,6 +23,7 @@ import {
export const TELEMETRY_TASK_TYPE = 'alerting_telemetry';
export const TASK_ID = `Alerting-${TELEMETRY_TASK_TYPE}`;
export const SCHEDULE: IntervalSchedule = { interval: '1d' };
export function initializeAlertingTelemetry(
logger: Logger,
@ -69,6 +70,7 @@ async function scheduleTasks(logger: Logger, taskManager: TaskManagerStartContra
taskType: TELEMETRY_TASK_TYPE,
state: {},
params: {},
schedule: SCHEDULE,
});
} catch (e) {
logger.debug(`Error scheduling task, received ${e.message}`);
@ -189,7 +191,9 @@ export function telemetryTaskRunner(
percentile_num_alerts_by_type_per_day:
dailyExecutionCounts.alertsPercentilesByType,
},
runAt: getNextMidnight(),
// Useful for setting a schedule for the old tasks that don't have one
// or to update the schedule if ever the frequency changes in code
schedule: SCHEDULE,
};
}
)
@ -197,14 +201,12 @@ export function telemetryTaskRunner(
logger.warn(`Error executing alerting telemetry task: ${errMsg}`);
return {
state: {},
runAt: getNextMidnight(),
// Useful for setting a schedule for the old tasks that don't have one
// or to update the schedule if ever the frequency changes in code
schedule: SCHEDULE,
};
});
},
};
};
}
function getNextMidnight() {
return moment().add(1, 'd').startOf('d').toDate();
}

View file

@ -18,6 +18,7 @@ export type {
EphemeralTask,
TaskRunCreatorFunction,
RunContext,
IntervalSchedule,
} from './task';
export { TaskStatus } from './task';

View file

@ -20,8 +20,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F
const esTestIndexTool = new ESTestIndexTool(es, retry);
const supertestWithoutAuth = getService('supertestWithoutAuth');
// FLAKY: https://github.com/elastic/kibana/issues/140973
describe.skip('telemetry', () => {
describe('telemetry', () => {
const alwaysFiringRuleId: { [key: string]: string } = {};
beforeEach(async () => {