mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 17:28:26 -04:00
# Backport This will backport the following commits from `main` to `8.15`: - [[Detection Engine] Addresses Flakiness in ML FTR tests (#188155)](https://github.com/elastic/kibana/pull/188155) <!--- Backport version: 8.9.8 --> ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport) <!--BACKPORT [{"author":{"name":"Ryland Herrick","email":"ryalnd@gmail.com"},"sourceCommit":{"committedDate":"2024-07-12T19:10:25Z","message":"[Detection Engine] Addresses Flakiness in ML FTR tests (#188155)\n\n## Summary\r\n\r\nThe full chronicle of this endeavor can be found\r\n[here](https://github.com/elastic/kibana/pull/182183), but [this\r\ncomment](https://github.com/elastic/kibana/pull/182183#issuecomment-2221517519)\r\nsummarizes the identified issue:\r\n\r\n> I [finally\r\nfound](https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/6516#01909dde-a3e8-4e47-b255-b1ff7cac8f8d/6-2368)\r\nthe cause of these failures in the response to our \"setup modules\"\r\nrequest to ML. Attaching here for posterity:\r\n>\r\n> <details>\r\n> <summary>Setup Modules Failure Response</summary>\r\n> \r\n> ```json\r\n> {\r\n> \"jobs\": [\r\n> { \"id\": \"v3_linux_anomalous_network_port_activity\", \"success\": true },\r\n> {\r\n> \"id\": \"v3_linux_anomalous_network_activity\",\r\n> \"success\": false,\r\n> \"error\": {\r\n> \"error\": {\r\n> \"root_cause\": [\r\n> {\r\n> \"type\": \"no_shard_available_action_exception\",\r\n> \"reason\":\r\n\"[ftr][127.0.0.1:9300][indices:data/read/search[phase/query]]\"\r\n> }\r\n> ],\r\n> \"type\": \"search_phase_execution_exception\",\r\n> \"reason\": \"all shards failed\",\r\n> \"phase\": \"query\",\r\n> \"grouped\": true,\r\n> \"failed_shards\": [\r\n> {\r\n> \"shard\": 0,\r\n> \"index\":\r\n\".ml-anomalies-custom-v3_linux_network_configuration_discovery\",\r\n> \"node\": \"dKzpvp06ScO0OxqHilETEA\",\r\n> \"reason\": {\r\n> \"type\": \"no_shard_available_action_exception\",\r\n> \"reason\":\r\n\"[ftr][127.0.0.1:9300][indices:data/read/search[phase/query]]\"\r\n> }\r\n> }\r\n> ]\r\n> },\r\n> \"status\": 503\r\n> }\r\n> }\r\n> ],\r\n> \"datafeeds\": [\r\n> {\r\n> \"id\": \"datafeed-v3_linux_anomalous_network_port_activity\",\r\n> \"success\": true,\r\n> \"started\": false,\r\n> \"awaitingMlNodeAllocation\": false\r\n> },\r\n> {\r\n> \"id\": \"datafeed-v3_linux_anomalous_network_activity\",\r\n> \"success\": false,\r\n> \"started\": false,\r\n> \"awaitingMlNodeAllocation\": false,\r\n> \"error\": {\r\n> \"error\": {\r\n> \"root_cause\": [\r\n> {\r\n> \"type\": \"resource_not_found_exception\",\r\n> \"reason\": \"No known job with id 'v3_linux_anomalous_network_activity'\"\r\n> }\r\n> ],\r\n> \"type\": \"resource_not_found_exception\",\r\n> \"reason\": \"No known job with id 'v3_linux_anomalous_network_activity'\"\r\n> },\r\n> \"status\": 404\r\n> }\r\n> }\r\n> ],\r\n> \"kibana\": {}\r\n> }\r\n> \r\n> ```\r\n> </details>\r\n\r\nThis branch, then, fixes said issue by (relatively simply) retrying the\r\nfailed API call until it succeeds.\r\n\r\n### Related Issues\r\nAddresses:\r\n- https://github.com/elastic/kibana/issues/171426\r\n- https://github.com/elastic/kibana/issues/187478\r\n- https://github.com/elastic/kibana/issues/187614\r\n- https://github.com/elastic/kibana/issues/182009\r\n- https://github.com/elastic/kibana/issues/171426\r\n\r\n### Checklist\r\n\r\n- [x] [Unit or functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere updated or added to match the most common scenarios\r\n- [x] [Flaky Test\r\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was\r\nused on any tests changed\r\n- [x] [ESS Rule Execution FTR x\r\n200](https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/6528)\r\n- [x] [Serverless Rule Execution FTR x\r\n200](https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/6529)\r\n\r\n\r\n### For maintainers\r\n\r\n- [x] This was checked for breaking API changes and was [labeled\r\nappropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)","sha":"3df635ef4a8c86c41c91ac5f59198a9b67d1dc8b","branchLabelMapping":{"^v8.16.0$":"main","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","backport:skip","Feature:Detection Rules","Feature:ML Rule","Feature:Security ML Jobs","Feature:Rule Creation","Team:Detection Engine","Feature:Rule Edit","v8.16.0"],"number":188155,"url":"https://github.com/elastic/kibana/pull/188155","mergeCommit":{"message":"[Detection Engine] Addresses Flakiness in ML FTR tests (#188155)\n\n## Summary\r\n\r\nThe full chronicle of this endeavor can be found\r\n[here](https://github.com/elastic/kibana/pull/182183), but [this\r\ncomment](https://github.com/elastic/kibana/pull/182183#issuecomment-2221517519)\r\nsummarizes the identified issue:\r\n\r\n> I [finally\r\nfound](https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/6516#01909dde-a3e8-4e47-b255-b1ff7cac8f8d/6-2368)\r\nthe cause of these failures in the response to our \"setup modules\"\r\nrequest to ML. Attaching here for posterity:\r\n>\r\n> <details>\r\n> <summary>Setup Modules Failure Response</summary>\r\n> \r\n> ```json\r\n> {\r\n> \"jobs\": [\r\n> { \"id\": \"v3_linux_anomalous_network_port_activity\", \"success\": true },\r\n> {\r\n> \"id\": \"v3_linux_anomalous_network_activity\",\r\n> \"success\": false,\r\n> \"error\": {\r\n> \"error\": {\r\n> \"root_cause\": [\r\n> {\r\n> \"type\": \"no_shard_available_action_exception\",\r\n> \"reason\":\r\n\"[ftr][127.0.0.1:9300][indices:data/read/search[phase/query]]\"\r\n> }\r\n> ],\r\n> \"type\": \"search_phase_execution_exception\",\r\n> \"reason\": \"all shards failed\",\r\n> \"phase\": \"query\",\r\n> \"grouped\": true,\r\n> \"failed_shards\": [\r\n> {\r\n> \"shard\": 0,\r\n> \"index\":\r\n\".ml-anomalies-custom-v3_linux_network_configuration_discovery\",\r\n> \"node\": \"dKzpvp06ScO0OxqHilETEA\",\r\n> \"reason\": {\r\n> \"type\": \"no_shard_available_action_exception\",\r\n> \"reason\":\r\n\"[ftr][127.0.0.1:9300][indices:data/read/search[phase/query]]\"\r\n> }\r\n> }\r\n> ]\r\n> },\r\n> \"status\": 503\r\n> }\r\n> }\r\n> ],\r\n> \"datafeeds\": [\r\n> {\r\n> \"id\": \"datafeed-v3_linux_anomalous_network_port_activity\",\r\n> \"success\": true,\r\n> \"started\": false,\r\n> \"awaitingMlNodeAllocation\": false\r\n> },\r\n> {\r\n> \"id\": \"datafeed-v3_linux_anomalous_network_activity\",\r\n> \"success\": false,\r\n> \"started\": false,\r\n> \"awaitingMlNodeAllocation\": false,\r\n> \"error\": {\r\n> \"error\": {\r\n> \"root_cause\": [\r\n> {\r\n> \"type\": \"resource_not_found_exception\",\r\n> \"reason\": \"No known job with id 'v3_linux_anomalous_network_activity'\"\r\n> }\r\n> ],\r\n> \"type\": \"resource_not_found_exception\",\r\n> \"reason\": \"No known job with id 'v3_linux_anomalous_network_activity'\"\r\n> },\r\n> \"status\": 404\r\n> }\r\n> }\r\n> ],\r\n> \"kibana\": {}\r\n> }\r\n> \r\n> ```\r\n> </details>\r\n\r\nThis branch, then, fixes said issue by (relatively simply) retrying the\r\nfailed API call until it succeeds.\r\n\r\n### Related Issues\r\nAddresses:\r\n- https://github.com/elastic/kibana/issues/171426\r\n- https://github.com/elastic/kibana/issues/187478\r\n- https://github.com/elastic/kibana/issues/187614\r\n- https://github.com/elastic/kibana/issues/182009\r\n- https://github.com/elastic/kibana/issues/171426\r\n\r\n### Checklist\r\n\r\n- [x] [Unit or functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere updated or added to match the most common scenarios\r\n- [x] [Flaky Test\r\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was\r\nused on any tests changed\r\n- [x] [ESS Rule Execution FTR x\r\n200](https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/6528)\r\n- [x] [Serverless Rule Execution FTR x\r\n200](https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/6529)\r\n\r\n\r\n### For maintainers\r\n\r\n- [x] This was checked for breaking API changes and was [labeled\r\nappropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)","sha":"3df635ef4a8c86c41c91ac5f59198a9b67d1dc8b"}},"sourceBranch":"main","suggestedTargetBranches":[],"targetPullRequestStates":[{"branch":"main","label":"v8.16.0","labelRegex":"^v8.16.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/188155","number":188155,"mergeCommit":{"message":"[Detection Engine] Addresses Flakiness in ML FTR tests (#188155)\n\n## Summary\r\n\r\nThe full chronicle of this endeavor can be found\r\n[here](https://github.com/elastic/kibana/pull/182183), but [this\r\ncomment](https://github.com/elastic/kibana/pull/182183#issuecomment-2221517519)\r\nsummarizes the identified issue:\r\n\r\n> I [finally\r\nfound](https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/6516#01909dde-a3e8-4e47-b255-b1ff7cac8f8d/6-2368)\r\nthe cause of these failures in the response to our \"setup modules\"\r\nrequest to ML. Attaching here for posterity:\r\n>\r\n> <details>\r\n> <summary>Setup Modules Failure Response</summary>\r\n> \r\n> ```json\r\n> {\r\n> \"jobs\": [\r\n> { \"id\": \"v3_linux_anomalous_network_port_activity\", \"success\": true },\r\n> {\r\n> \"id\": \"v3_linux_anomalous_network_activity\",\r\n> \"success\": false,\r\n> \"error\": {\r\n> \"error\": {\r\n> \"root_cause\": [\r\n> {\r\n> \"type\": \"no_shard_available_action_exception\",\r\n> \"reason\":\r\n\"[ftr][127.0.0.1:9300][indices:data/read/search[phase/query]]\"\r\n> }\r\n> ],\r\n> \"type\": \"search_phase_execution_exception\",\r\n> \"reason\": \"all shards failed\",\r\n> \"phase\": \"query\",\r\n> \"grouped\": true,\r\n> \"failed_shards\": [\r\n> {\r\n> \"shard\": 0,\r\n> \"index\":\r\n\".ml-anomalies-custom-v3_linux_network_configuration_discovery\",\r\n> \"node\": \"dKzpvp06ScO0OxqHilETEA\",\r\n> \"reason\": {\r\n> \"type\": \"no_shard_available_action_exception\",\r\n> \"reason\":\r\n\"[ftr][127.0.0.1:9300][indices:data/read/search[phase/query]]\"\r\n> }\r\n> }\r\n> ]\r\n> },\r\n> \"status\": 503\r\n> }\r\n> }\r\n> ],\r\n> \"datafeeds\": [\r\n> {\r\n> \"id\": \"datafeed-v3_linux_anomalous_network_port_activity\",\r\n> \"success\": true,\r\n> \"started\": false,\r\n> \"awaitingMlNodeAllocation\": false\r\n> },\r\n> {\r\n> \"id\": \"datafeed-v3_linux_anomalous_network_activity\",\r\n> \"success\": false,\r\n> \"started\": false,\r\n> \"awaitingMlNodeAllocation\": false,\r\n> \"error\": {\r\n> \"error\": {\r\n> \"root_cause\": [\r\n> {\r\n> \"type\": \"resource_not_found_exception\",\r\n> \"reason\": \"No known job with id 'v3_linux_anomalous_network_activity'\"\r\n> }\r\n> ],\r\n> \"type\": \"resource_not_found_exception\",\r\n> \"reason\": \"No known job with id 'v3_linux_anomalous_network_activity'\"\r\n> },\r\n> \"status\": 404\r\n> }\r\n> }\r\n> ],\r\n> \"kibana\": {}\r\n> }\r\n> \r\n> ```\r\n> </details>\r\n\r\nThis branch, then, fixes said issue by (relatively simply) retrying the\r\nfailed API call until it succeeds.\r\n\r\n### Related Issues\r\nAddresses:\r\n- https://github.com/elastic/kibana/issues/171426\r\n- https://github.com/elastic/kibana/issues/187478\r\n- https://github.com/elastic/kibana/issues/187614\r\n- https://github.com/elastic/kibana/issues/182009\r\n- https://github.com/elastic/kibana/issues/171426\r\n\r\n### Checklist\r\n\r\n- [x] [Unit or functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere updated or added to match the most common scenarios\r\n- [x] [Flaky Test\r\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was\r\nused on any tests changed\r\n- [x] [ESS Rule Execution FTR x\r\n200](https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/6528)\r\n- [x] [Serverless Rule Execution FTR x\r\n200](https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/6529)\r\n\r\n\r\n### For maintainers\r\n\r\n- [x] This was checked for breaking API changes and was [labeled\r\nappropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)","sha":"3df635ef4a8c86c41c91ac5f59198a9b67d1dc8b"}}]}] BACKPORT-->
This commit is contained in:
parent
bc5b5da7fd
commit
38677570cb
3 changed files with 46 additions and 9 deletions
|
@ -40,12 +40,12 @@ import {
|
|||
importFile,
|
||||
} from '../../../../../lists_and_exception_lists/utils';
|
||||
import {
|
||||
executeSetupModuleRequest,
|
||||
forceStartDatafeeds,
|
||||
getAlerts,
|
||||
getPreviewAlerts,
|
||||
previewRule,
|
||||
previewRuleWithExceptionEntries,
|
||||
setupMlModulesWithRetry,
|
||||
} from '../../../../utils';
|
||||
import {
|
||||
createRule,
|
||||
|
@ -86,13 +86,12 @@ export default ({ getService }: FtrProviderContext) => {
|
|||
rule_id: 'ml-rule-id',
|
||||
};
|
||||
|
||||
// FLAKY: https://github.com/elastic/kibana/issues/171426
|
||||
describe.skip('@ess @serverless @serverlessQA Machine learning type rules', () => {
|
||||
describe('@ess @serverless @serverlessQA Machine learning type rules', () => {
|
||||
before(async () => {
|
||||
// Order is critical here: auditbeat data must be loaded before attempting to start the ML job,
|
||||
// as the job looks for certain indices on start
|
||||
await esArchiver.load(auditPath);
|
||||
await executeSetupModuleRequest({ module: siemModule, rspCode: 200, supertest });
|
||||
await setupMlModulesWithRetry({ module: siemModule, supertest, retry });
|
||||
await forceStartDatafeeds({ jobId: mlJobId, rspCode: 200, supertest });
|
||||
await esArchiver.load('x-pack/test/functional/es_archives/security_solution/anomalies');
|
||||
});
|
||||
|
|
|
@ -27,7 +27,6 @@ import { EsArchivePathBuilder } from '../../../../../../es_archive_path_builder'
|
|||
import { FtrProviderContext } from '../../../../../../ftr_provider_context';
|
||||
import {
|
||||
dataGeneratorFactory,
|
||||
executeSetupModuleRequest,
|
||||
forceStartDatafeeds,
|
||||
getAlerts,
|
||||
getOpenAlerts,
|
||||
|
@ -36,6 +35,7 @@ import {
|
|||
previewRule,
|
||||
previewRuleWithExceptionEntries,
|
||||
setAlertStatus,
|
||||
setupMlModulesWithRetry,
|
||||
} from '../../../../utils';
|
||||
import {
|
||||
createRule,
|
||||
|
@ -51,6 +51,7 @@ export default ({ getService }: FtrProviderContext) => {
|
|||
const es = getService('es');
|
||||
const log = getService('log');
|
||||
const config = getService('config');
|
||||
const retry = getService('retry');
|
||||
|
||||
const isServerless = config.get('serverless');
|
||||
const dataPathBuilder = new EsArchivePathBuilder(isServerless);
|
||||
|
@ -87,14 +88,13 @@ export default ({ getService }: FtrProviderContext) => {
|
|||
// The tests described in this file rely on the
|
||||
// 'alertSuppressionForMachineLearningRuleEnabled' feature flag, and are thus
|
||||
// skipped in MKI
|
||||
// Failing: See https://github.com/elastic/kibana/issues/187478
|
||||
describe.skip('@ess @serverless @skipInServerlessMKI Machine Learning Detection Rule - Alert Suppression', () => {
|
||||
describe('@ess @serverless @skipInServerlessMKI Machine Learning Detection Rule - Alert Suppression', () => {
|
||||
describe('with an active ML Job', () => {
|
||||
before(async () => {
|
||||
// Order is critical here: auditbeat data must be loaded before attempting to start the ML job,
|
||||
// as the job looks for certain indices on start
|
||||
await esArchiver.load(auditbeatArchivePath);
|
||||
await executeSetupModuleRequest({ module: mlModuleName, rspCode: 200, supertest });
|
||||
await setupMlModulesWithRetry({ module: mlModuleName, retry, supertest });
|
||||
await forceStartDatafeeds({ jobId: mlJobId, rspCode: 200, supertest });
|
||||
await esArchiver.load('x-pack/test/functional/es_archives/security_solution/anomalies');
|
||||
await deleteAllAnomalies(log, es);
|
||||
|
|
|
@ -6,9 +6,18 @@
|
|||
*/
|
||||
|
||||
import type SuperTest from 'supertest';
|
||||
import { RetryService } from '@kbn/ftr-common-functional-services';
|
||||
import { ML_GROUP_ID } from '@kbn/security-solution-plugin/common/constants';
|
||||
import { getCommonRequestHeader } from '../../../../../functional/services/ml/common_api';
|
||||
|
||||
interface ModuleJob {
|
||||
id: string;
|
||||
success: boolean;
|
||||
error?: {
|
||||
status: number;
|
||||
};
|
||||
}
|
||||
|
||||
export const executeSetupModuleRequest = async ({
|
||||
module,
|
||||
rspCode,
|
||||
|
@ -17,7 +26,7 @@ export const executeSetupModuleRequest = async ({
|
|||
module: string;
|
||||
rspCode: number;
|
||||
supertest: SuperTest.Agent;
|
||||
}) => {
|
||||
}): Promise<{ jobs: ModuleJob[] }> => {
|
||||
const { body } = await supertest
|
||||
.post(`/internal/ml/modules/setup/${module}`)
|
||||
.set(getCommonRequestHeader('1'))
|
||||
|
@ -34,6 +43,35 @@ export const executeSetupModuleRequest = async ({
|
|||
return body;
|
||||
};
|
||||
|
||||
export const setupMlModulesWithRetry = async ({
|
||||
module,
|
||||
retry,
|
||||
supertest,
|
||||
}: {
|
||||
module: string;
|
||||
retry: RetryService;
|
||||
supertest: SuperTest.Agent;
|
||||
}) =>
|
||||
retry.try(async () => {
|
||||
const response = await executeSetupModuleRequest({
|
||||
module,
|
||||
rspCode: 200,
|
||||
supertest,
|
||||
});
|
||||
|
||||
const allJobsSucceeded = response?.jobs.every((job) => {
|
||||
return job.success || (job.error?.status && job.error.status < 500);
|
||||
});
|
||||
|
||||
if (!allJobsSucceeded) {
|
||||
throw new Error(
|
||||
`Expected all jobs to set up successfully, but got ${JSON.stringify(response)}`
|
||||
);
|
||||
}
|
||||
|
||||
return response;
|
||||
});
|
||||
|
||||
export const forceStartDatafeeds = async ({
|
||||
jobId,
|
||||
rspCode,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue