mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 09:19:04 -04:00
[Detection Engine] Addresses Flakiness in ML FTR tests (#188155)
## Summary The full chronicle of this endeavor can be found [here](https://github.com/elastic/kibana/pull/182183), but [this comment](https://github.com/elastic/kibana/pull/182183#issuecomment-2221517519) summarizes the identified issue: > I [finally found](https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/6516#01909dde-a3e8-4e47-b255-b1ff7cac8f8d/6-2368) the cause of these failures in the response to our "setup modules" request to ML. Attaching here for posterity: > > <details> > <summary>Setup Modules Failure Response</summary> > > ```json > { > "jobs": [ > { "id": "v3_linux_anomalous_network_port_activity", "success": true }, > { > "id": "v3_linux_anomalous_network_activity", > "success": false, > "error": { > "error": { > "root_cause": [ > { > "type": "no_shard_available_action_exception", > "reason": "[ftr][127.0.0.1:9300][indices:data/read/search[phase/query]]" > } > ], > "type": "search_phase_execution_exception", > "reason": "all shards failed", > "phase": "query", > "grouped": true, > "failed_shards": [ > { > "shard": 0, > "index": ".ml-anomalies-custom-v3_linux_network_configuration_discovery", > "node": "dKzpvp06ScO0OxqHilETEA", > "reason": { > "type": "no_shard_available_action_exception", > "reason": "[ftr][127.0.0.1:9300][indices:data/read/search[phase/query]]" > } > } > ] > }, > "status": 503 > } > } > ], > "datafeeds": [ > { > "id": "datafeed-v3_linux_anomalous_network_port_activity", > "success": true, > "started": false, > "awaitingMlNodeAllocation": false > }, > { > "id": "datafeed-v3_linux_anomalous_network_activity", > "success": false, > "started": false, > "awaitingMlNodeAllocation": false, > "error": { > "error": { > "root_cause": [ > { > "type": "resource_not_found_exception", > "reason": "No known job with id 'v3_linux_anomalous_network_activity'" > } > ], > "type": "resource_not_found_exception", > "reason": "No known job with id 'v3_linux_anomalous_network_activity'" > }, > "status": 404 > } > } > ], > "kibana": {} > } > > ``` > </details> This branch, then, fixes said issue by (relatively simply) retrying the failed API call until it succeeds. ### Related Issues Addresses: - https://github.com/elastic/kibana/issues/171426 - https://github.com/elastic/kibana/issues/187478 - https://github.com/elastic/kibana/issues/187614 - https://github.com/elastic/kibana/issues/182009 - https://github.com/elastic/kibana/issues/171426 ### Checklist - [x] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios - [x] [Flaky Test Runner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was used on any tests changed - [x] [ESS Rule Execution FTR x 200](https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/6528) - [x] [Serverless Rule Execution FTR x 200](https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/6529) ### For maintainers - [x] This was checked for breaking API changes and was [labeled appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)
This commit is contained in:
parent
4c0db61e6d
commit
3df635ef4a
3 changed files with 47 additions and 11 deletions
|
@ -40,12 +40,12 @@ import {
|
|||
importFile,
|
||||
} from '../../../../../lists_and_exception_lists/utils';
|
||||
import {
|
||||
executeSetupModuleRequest,
|
||||
forceStartDatafeeds,
|
||||
getAlerts,
|
||||
getPreviewAlerts,
|
||||
previewRule,
|
||||
previewRuleWithExceptionEntries,
|
||||
setupMlModulesWithRetry,
|
||||
} from '../../../../utils';
|
||||
import {
|
||||
createRule,
|
||||
|
@ -86,13 +86,12 @@ export default ({ getService }: FtrProviderContext) => {
|
|||
rule_id: 'ml-rule-id',
|
||||
};
|
||||
|
||||
// FLAKY: https://github.com/elastic/kibana/issues/171426
|
||||
describe.skip('@ess @serverless @serverlessQA Machine learning type rules', () => {
|
||||
describe('@ess @serverless @serverlessQA Machine learning type rules', () => {
|
||||
before(async () => {
|
||||
// Order is critical here: auditbeat data must be loaded before attempting to start the ML job,
|
||||
// as the job looks for certain indices on start
|
||||
await esArchiver.load(auditPath);
|
||||
await executeSetupModuleRequest({ module: siemModule, rspCode: 200, supertest });
|
||||
await setupMlModulesWithRetry({ module: siemModule, supertest, retry });
|
||||
await forceStartDatafeeds({ jobId: mlJobId, rspCode: 200, supertest });
|
||||
await esArchiver.load('x-pack/test/functional/es_archives/security_solution/anomalies');
|
||||
});
|
||||
|
|
|
@ -27,7 +27,6 @@ import { EsArchivePathBuilder } from '../../../../../../es_archive_path_builder'
|
|||
import { FtrProviderContext } from '../../../../../../ftr_provider_context';
|
||||
import {
|
||||
dataGeneratorFactory,
|
||||
executeSetupModuleRequest,
|
||||
forceStartDatafeeds,
|
||||
getAlerts,
|
||||
getOpenAlerts,
|
||||
|
@ -36,6 +35,7 @@ import {
|
|||
previewRule,
|
||||
previewRuleWithExceptionEntries,
|
||||
setAlertStatus,
|
||||
setupMlModulesWithRetry,
|
||||
} from '../../../../utils';
|
||||
import {
|
||||
createRule,
|
||||
|
@ -51,6 +51,7 @@ export default ({ getService }: FtrProviderContext) => {
|
|||
const es = getService('es');
|
||||
const log = getService('log');
|
||||
const config = getService('config');
|
||||
const retry = getService('retry');
|
||||
|
||||
const isServerless = config.get('serverless');
|
||||
const dataPathBuilder = new EsArchivePathBuilder(isServerless);
|
||||
|
@ -93,7 +94,7 @@ export default ({ getService }: FtrProviderContext) => {
|
|||
// Order is critical here: auditbeat data must be loaded before attempting to start the ML job,
|
||||
// as the job looks for certain indices on start
|
||||
await esArchiver.load(auditbeatArchivePath);
|
||||
await executeSetupModuleRequest({ module: mlModuleName, rspCode: 200, supertest });
|
||||
await setupMlModulesWithRetry({ module: mlModuleName, retry, supertest });
|
||||
await forceStartDatafeeds({ jobId: mlJobId, rspCode: 200, supertest });
|
||||
await esArchiver.load('x-pack/test/functional/es_archives/security_solution/anomalies');
|
||||
await deleteAllAnomalies(log, es);
|
||||
|
@ -112,8 +113,7 @@ export default ({ getService }: FtrProviderContext) => {
|
|||
await deleteAllAnomalies(log, es);
|
||||
});
|
||||
|
||||
// FLAKY: https://github.com/elastic/kibana/issues/187478
|
||||
describe.skip('with per-execution suppression duration', () => {
|
||||
describe('with per-execution suppression duration', () => {
|
||||
beforeEach(() => {
|
||||
ruleProps = {
|
||||
...baseRuleProps,
|
||||
|
@ -245,8 +245,7 @@ export default ({ getService }: FtrProviderContext) => {
|
|||
});
|
||||
});
|
||||
|
||||
// FLAKY: https://github.com/elastic/kibana/issues/187614
|
||||
describe.skip('with interval suppression duration', () => {
|
||||
describe('with interval suppression duration', () => {
|
||||
beforeEach(() => {
|
||||
ruleProps = {
|
||||
...baseRuleProps,
|
||||
|
|
|
@ -6,9 +6,18 @@
|
|||
*/
|
||||
|
||||
import type SuperTest from 'supertest';
|
||||
import { RetryService } from '@kbn/ftr-common-functional-services';
|
||||
import { ML_GROUP_ID } from '@kbn/security-solution-plugin/common/constants';
|
||||
import { getCommonRequestHeader } from '../../../../../functional/services/ml/common_api';
|
||||
|
||||
interface ModuleJob {
|
||||
id: string;
|
||||
success: boolean;
|
||||
error?: {
|
||||
status: number;
|
||||
};
|
||||
}
|
||||
|
||||
export const executeSetupModuleRequest = async ({
|
||||
module,
|
||||
rspCode,
|
||||
|
@ -17,7 +26,7 @@ export const executeSetupModuleRequest = async ({
|
|||
module: string;
|
||||
rspCode: number;
|
||||
supertest: SuperTest.Agent;
|
||||
}) => {
|
||||
}): Promise<{ jobs: ModuleJob[] }> => {
|
||||
const { body } = await supertest
|
||||
.post(`/internal/ml/modules/setup/${module}`)
|
||||
.set(getCommonRequestHeader('1'))
|
||||
|
@ -34,6 +43,35 @@ export const executeSetupModuleRequest = async ({
|
|||
return body;
|
||||
};
|
||||
|
||||
export const setupMlModulesWithRetry = async ({
|
||||
module,
|
||||
retry,
|
||||
supertest,
|
||||
}: {
|
||||
module: string;
|
||||
retry: RetryService;
|
||||
supertest: SuperTest.Agent;
|
||||
}) =>
|
||||
retry.try(async () => {
|
||||
const response = await executeSetupModuleRequest({
|
||||
module,
|
||||
rspCode: 200,
|
||||
supertest,
|
||||
});
|
||||
|
||||
const allJobsSucceeded = response?.jobs.every((job) => {
|
||||
return job.success || (job.error?.status && job.error.status < 500);
|
||||
});
|
||||
|
||||
if (!allJobsSucceeded) {
|
||||
throw new Error(
|
||||
`Expected all jobs to set up successfully, but got ${JSON.stringify(response)}`
|
||||
);
|
||||
}
|
||||
|
||||
return response;
|
||||
});
|
||||
|
||||
export const forceStartDatafeeds = async ({
|
||||
jobId,
|
||||
rspCode,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue