[8.12] [Security Solution] Add retryIfConflict util for 409 conflicts in Integration tests (#174185) (#174762)

# Backport

This will backport the following commits from `main` to `8.12`:
- [[Security Solution] Add `retryIfConflict` util for `409` conflicts in
Integration tests
(#174185)](https://github.com/elastic/kibana/pull/174185)

<!--- Backport version: 8.9.8 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Juan Pablo
Djeredjian","email":"jpdjeredjian@gmail.com"},"sourceCommit":{"committedDate":"2024-01-11T12:39:45Z","message":"[Security
Solution] Add `retryIfConflict` util for `409` conflicts in Integration
tests (#174185)\n\n## Summary\r\n\r\nFixes:
https://github.com/elastic/kibana/issues/171428\r\n\r\n**NOTE: the test
where this was reported wasn't skipped, so this PR does\r\nnot unskip
any tests.** However, the Flaky Test Runs help us determine\r\nthat the
issue is no longer reproducible.\r\n\r\nThe
`deleteAllPrebuiltRuleAssets` utility reported a `409
Conflict`,\r\npresumably from `security-rule` assets that were attempted
to be deleted\r\nwhile they were being updated by a parallel
process.\r\n\r\nThis PR wraps the `es.deleteByQuery` calls in the
utils\r\n`deleteAllPrebuiltRuleAssets` and `deleteAllTimelines` with a
new\r\n`retryIfConflict` helper, that will retry the operation if the
ES\r\nrequest fails with a `409`.\r\n\r\n## Flaky test
run\r\n\r\n`bundled_prebuilt_rules_package` - **ESS** and
**Serverless**:\r\nhttps://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/4790\r\n\r\n`large_prebuilt_rules_package`
- **ESS** and
**Serverless**:\r\nhttps://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/4791\r\n\r\n`update_prebuilt_rules_package`
- **ESS** and
**Serverless**:\r\nhttps://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/4792\r\n\r\n`management`
- **ESS** and
**Serverless**:\r\nhttps://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/4793\r\n\r\n###
For maintainers\r\n\r\n- [ ] This was checked for breaking API changes
and was
[labeled\r\nappropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)","sha":"b8c7306d241807b68bedbd477dcec232e203f6ad","branchLabelMapping":{"^v8.13.0$":"main","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["test","release_note:skip","Team:Detections
and Resp","Team: SecuritySolution","Team:Detection Rule
Management","Feature:Prebuilt Detection
Rules","v8.12.0","v8.12.1","v8.13.0"],"number":174185,"url":"https://github.com/elastic/kibana/pull/174185","mergeCommit":{"message":"[Security
Solution] Add `retryIfConflict` util for `409` conflicts in Integration
tests (#174185)\n\n## Summary\r\n\r\nFixes:
https://github.com/elastic/kibana/issues/171428\r\n\r\n**NOTE: the test
where this was reported wasn't skipped, so this PR does\r\nnot unskip
any tests.** However, the Flaky Test Runs help us determine\r\nthat the
issue is no longer reproducible.\r\n\r\nThe
`deleteAllPrebuiltRuleAssets` utility reported a `409
Conflict`,\r\npresumably from `security-rule` assets that were attempted
to be deleted\r\nwhile they were being updated by a parallel
process.\r\n\r\nThis PR wraps the `es.deleteByQuery` calls in the
utils\r\n`deleteAllPrebuiltRuleAssets` and `deleteAllTimelines` with a
new\r\n`retryIfConflict` helper, that will retry the operation if the
ES\r\nrequest fails with a `409`.\r\n\r\n## Flaky test
run\r\n\r\n`bundled_prebuilt_rules_package` - **ESS** and
**Serverless**:\r\nhttps://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/4790\r\n\r\n`large_prebuilt_rules_package`
- **ESS** and
**Serverless**:\r\nhttps://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/4791\r\n\r\n`update_prebuilt_rules_package`
- **ESS** and
**Serverless**:\r\nhttps://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/4792\r\n\r\n`management`
- **ESS** and
**Serverless**:\r\nhttps://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/4793\r\n\r\n###
For maintainers\r\n\r\n- [ ] This was checked for breaking API changes
and was
[labeled\r\nappropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)","sha":"b8c7306d241807b68bedbd477dcec232e203f6ad"}},"sourceBranch":"main","suggestedTargetBranches":["8.12"],"targetPullRequestStates":[{"branch":"8.12","label":"v8.12.0","labelRegex":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"main","label":"v8.13.0","labelRegex":"^v8.13.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/174185","number":174185,"mergeCommit":{"message":"[Security
Solution] Add `retryIfConflict` util for `409` conflicts in Integration
tests (#174185)\n\n## Summary\r\n\r\nFixes:
https://github.com/elastic/kibana/issues/171428\r\n\r\n**NOTE: the test
where this was reported wasn't skipped, so this PR does\r\nnot unskip
any tests.** However, the Flaky Test Runs help us determine\r\nthat the
issue is no longer reproducible.\r\n\r\nThe
`deleteAllPrebuiltRuleAssets` utility reported a `409
Conflict`,\r\npresumably from `security-rule` assets that were attempted
to be deleted\r\nwhile they were being updated by a parallel
process.\r\n\r\nThis PR wraps the `es.deleteByQuery` calls in the
utils\r\n`deleteAllPrebuiltRuleAssets` and `deleteAllTimelines` with a
new\r\n`retryIfConflict` helper, that will retry the operation if the
ES\r\nrequest fails with a `409`.\r\n\r\n## Flaky test
run\r\n\r\n`bundled_prebuilt_rules_package` - **ESS** and
**Serverless**:\r\nhttps://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/4790\r\n\r\n`large_prebuilt_rules_package`
- **ESS** and
**Serverless**:\r\nhttps://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/4791\r\n\r\n`update_prebuilt_rules_package`
- **ESS** and
**Serverless**:\r\nhttps://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/4792\r\n\r\n`management`
- **ESS** and
**Serverless**:\r\nhttps://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/4793\r\n\r\n###
For maintainers\r\n\r\n- [ ] This was checked for breaking API changes
and was
[labeled\r\nappropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)","sha":"b8c7306d241807b68bedbd477dcec232e203f6ad"}}]}]
BACKPORT-->
This commit is contained in:
Juan Pablo Djeredjian 2024-01-15 11:42:12 +01:00 committed by GitHub
parent d1d01cf2d7
commit 24733e59d2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 105 additions and 41 deletions

View file

@ -87,7 +87,8 @@
"prebuilt_rules_update_prebuilt_rules_package:runner:serverless": "npm run run-tests:dr:default prebuilt_rules/update_prebuilt_rules_package serverless serverlessEnv",
"prebuilt_rules_update_prebuilt_rules_package:qa:serverless": "npm run run-tests:dr:default prebuilt_rules/update_prebuilt_rules_package serverless qaEnv",
"prebuilt_rules_update_prebuilt_rules_package:server:ess": "npm run initialize-server:dr:default prebuilt_rules/update_prebuilt_rules_package ess",
"prebuilt_rules_update_prebuilt_rules_package:runner:ess": "npm run run-tests:dr:default prebuilt_rules/update_prebuilt_rules_package ess essEnvs",
"prebuilt_rules_update_prebuilt_rules_package:runner:ess": "npm run run-tests:dr:default prebuilt_rules/update_prebuilt_rules_package ess essEnv",
"rule_execution_logic:server:serverless": "npm run initialize-server:dr:default rule_execution_logic serverless",
"rule_execution_logic:runner:serverless": "npm run run-tests:dr:default rule_execution_logic serverless serverlessEnv",
"rule_execution_logic:qa:serverless": "npm run run-tests:dr:default rule_execution_logic serverless qaEnv",

View file

@ -32,7 +32,7 @@ export default ({ getService }: FtrProviderContext): void => {
describe('@ess @serverless @skipInQA install_bundled_prebuilt_rules', () => {
beforeEach(async () => {
await deleteAllRules(supertest, log);
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
});
it('should list `security_detection_engine` as a bundled fleet package in the `fleet_package.json` file', async () => {

View file

@ -34,7 +34,7 @@ export default ({ getService }: FtrProviderContext): void => {
describe('@ess @serverless @skipInQA prerelease_packages', () => {
beforeEach(async () => {
await deleteAllRules(supertest, log);
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
await deletePrebuiltRulesFleetPackage(supertest);
});

View file

@ -21,12 +21,12 @@ export default ({ getService }: FtrProviderContext): void => {
describe('@ess @serverless @skipInQA install_large_prebuilt_rules_package', () => {
beforeEach(async () => {
await deleteAllRules(supertest, log);
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
});
afterEach(async () => {
await deleteAllRules(supertest, log);
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
});
it('should install a package containing 15000 prebuilt rules without crashing', async () => {

View file

@ -26,8 +26,8 @@ export default ({ getService }: FtrProviderContext): void => {
beforeEach(async () => {
await deletePrebuiltRulesFleetPackage(supertest);
await deleteAllRules(supertest, log);
await deleteAllTimelines(es);
await deleteAllPrebuiltRuleAssets(es);
await deleteAllTimelines(es, log);
await deleteAllPrebuiltRuleAssets(es, log);
});
/**

View file

@ -31,7 +31,7 @@ export default ({ getService }: FtrProviderContext): void => {
describe('@ess @serverless @skipInQA Prebuilt Rules status', () => {
describe('get_prebuilt_rules_status', () => {
beforeEach(async () => {
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
await deleteAllRules(supertest, log);
});
@ -110,7 +110,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRules(es, supertest);
// Clear previous rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
// Increment the version of one of the installed rules and create the new rule assets
ruleAssetSavedObjects[0]['security-rule'].version += 1;
await createPrebuiltRuleAssetSavedObjects(es, ruleAssetSavedObjects);
@ -130,7 +130,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRules(es, supertest);
// Clear previous rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
// Increment the version of one of the installed rules and create the new rule assets
ruleAssetSavedObjects[0]['security-rule'].version += 1;
await createPrebuiltRuleAssetSavedObjects(es, ruleAssetSavedObjects);
@ -152,7 +152,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRules(es, supertest);
// Clear previous rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
// Recreate the rules without bumping any versions
await createPrebuiltRuleAssetSavedObjects(es, ruleAssetSavedObjects);
@ -238,7 +238,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRules(es, supertest);
// Delete the previous versions of rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
// Add a new rule version
await createHistoricalPrebuiltRuleAssetSavedObjects(es, [
@ -261,7 +261,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRules(es, supertest);
// Delete the previous versions of rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
// Add a new rule version
await createHistoricalPrebuiltRuleAssetSavedObjects(es, [
@ -286,7 +286,7 @@ export default ({ getService }: FtrProviderContext): void => {
describe('get_prebuilt_rules_status - legacy', () => {
beforeEach(async () => {
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
await deleteAllRules(supertest, log);
});
@ -367,7 +367,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRulesAndTimelines(es, supertest);
// Clear previous rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
// Increment the version of one of the installed rules and create the new rule assets
ruleAssetSavedObjects[0]['security-rule'].version += 1;
await createPrebuiltRuleAssetSavedObjects(es, ruleAssetSavedObjects);
@ -387,7 +387,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRulesAndTimelines(es, supertest);
// Clear previous rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
// Recreate the rules without bumping any versions
await createPrebuiltRuleAssetSavedObjects(es, ruleAssetSavedObjects);
@ -473,7 +473,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRulesAndTimelines(es, supertest);
// Delete the previous versions of rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
// Add a new rule version
await createHistoricalPrebuiltRuleAssetSavedObjects(es, [

View file

@ -16,10 +16,11 @@ import {
export default ({ getService }: FtrProviderContext): void => {
const supertest = getService('supertest');
const es = getService('es');
const log = getService('log');
describe('@ess @serverless @skipInQA get_prebuilt_timelines_status', () => {
beforeEach(async () => {
await deleteAllTimelines(es);
await deleteAllTimelines(es, log);
});
it('should return the number of timeline templates available to install', async () => {

View file

@ -30,8 +30,8 @@ export default ({ getService }: FtrProviderContext): void => {
describe('@ess @serverless @skipInQA install and upgrade prebuilt rules with mock rule assets', () => {
beforeEach(async () => {
await deleteAllRules(supertest, log);
await deleteAllTimelines(es);
await deleteAllPrebuiltRuleAssets(es);
await deleteAllTimelines(es, log);
await deleteAllPrebuiltRuleAssets(es, log);
});
describe(`rule package without historical versions`, () => {
@ -96,7 +96,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRulesAndTimelines(es, supertest);
// Clear previous rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
// Increment the version of one of the installed rules and create the new rule assets
ruleAssetSavedObjects[0]['security-rule'].version += 1;
await createPrebuiltRuleAssetSavedObjects(es, ruleAssetSavedObjects);
@ -177,7 +177,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRules(es, supertest);
// Clear previous rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
// Increment the version of one of the installed rules and create the new rule assets
ruleAssetSavedObjects[0]['security-rule'].version += 1;
await createPrebuiltRuleAssetSavedObjects(es, ruleAssetSavedObjects);
@ -315,7 +315,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRulesAndTimelines(es, supertest);
// Clear previous rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
// Add a new rule version
await createHistoricalPrebuiltRuleAssetSavedObjects(es, [
@ -423,7 +423,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRules(es, supertest);
// Clear previous rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
// Add a new rule version
await createHistoricalPrebuiltRuleAssetSavedObjects(es, [

View file

@ -90,7 +90,7 @@ export default ({ getService }: FtrProviderContext): void => {
beforeEach(async () => {
await deleteAllRules(supertest, log);
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
});
it('should allow user to install prebuilt rules from scratch, then install new rules and upgrade existing rules from the new package', async () => {

View file

@ -0,0 +1,53 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { DeleteByQueryResponse } from '@elastic/elasticsearch/lib/api/types';
import { ToolingLog } from '@kbn/tooling-log';
// Number of times to retry when conflicts occur
const RETRY_ATTEMPTS = 2;
// Delay between retries when conflicts occur
const RETRY_DELAY = 200;
/*
* Retry an Elasticsearch deleteByQuery operation if it runs into 409 Conflicts,
* up to a maximum number of attempts.
*/
export async function retryIfDeleteByQueryConflicts<T>(
logger: ToolingLog,
name: string,
operation: () => Promise<DeleteByQueryResponse>,
retries: number = RETRY_ATTEMPTS,
retryDelay: number = RETRY_DELAY
): Promise<DeleteByQueryResponse> {
const operationResult = await operation();
if (!operationResult.failures || operationResult.failures?.length === 0) {
return operationResult;
}
for (const failure of operationResult.failures) {
if (failure.status === 409) {
// if no retries left, throw it
if (retries <= 0) {
logger.error(`${name} conflict, exceeded retries`);
throw new Error(`${name} conflict, exceeded retries`);
}
// Otherwise, delay a bit before retrying
logger.debug(`${name} conflict, retrying ...`);
await waitBeforeNextRetry(retryDelay);
return await retryIfDeleteByQueryConflicts(logger, name, operation, retries - 1);
}
}
return operationResult;
}
async function waitBeforeNextRetry(retryDelay: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, retryDelay));
}

View file

@ -4,20 +4,26 @@
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { ToolingLog } from '@kbn/tooling-log';
import type { Client } from '@elastic/elasticsearch';
import { SECURITY_SOLUTION_SAVED_OBJECT_INDEX } from '@kbn/core-saved-objects-server';
import { retryIfDeleteByQueryConflicts } from '../../retry_delete_by_query_conflicts';
/**
* Remove all prebuilt rule assets from the security solution savedObjects index
* @param es The ElasticSearch handle
*/
export const deleteAllPrebuiltRuleAssets = async (es: Client): Promise<void> => {
await es.deleteByQuery({
index: SECURITY_SOLUTION_SAVED_OBJECT_INDEX,
q: 'type:security-rule',
wait_for_completion: true,
refresh: true,
body: {},
export const deleteAllPrebuiltRuleAssets = async (
es: Client,
logger: ToolingLog
): Promise<void> => {
await retryIfDeleteByQueryConflicts(logger, deleteAllPrebuiltRuleAssets.name, async () => {
return await es.deleteByQuery({
index: SECURITY_SOLUTION_SAVED_OBJECT_INDEX,
q: 'type:security-rule',
wait_for_completion: true,
refresh: true,
body: {},
});
});
};

View file

@ -4,20 +4,23 @@
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { ToolingLog } from '@kbn/tooling-log';
import type { Client } from '@elastic/elasticsearch';
import { SECURITY_SOLUTION_SAVED_OBJECT_INDEX } from '@kbn/core-saved-objects-server';
import { retryIfDeleteByQueryConflicts } from '../../retry_delete_by_query_conflicts';
/**
* Remove all timelines from the security solution savedObjects index
* @param es The ElasticSearch handle
*/
export const deleteAllTimelines = async (es: Client): Promise<void> => {
await es.deleteByQuery({
index: SECURITY_SOLUTION_SAVED_OBJECT_INDEX,
q: 'type:siem-ui-timeline',
wait_for_completion: true,
refresh: true,
body: {},
export const deleteAllTimelines = async (es: Client, logger: ToolingLog): Promise<void> => {
await retryIfDeleteByQueryConflicts(logger, deleteAllTimelines.name, async () => {
return await es.deleteByQuery({
index: SECURITY_SOLUTION_SAVED_OBJECT_INDEX,
q: 'type:siem-ui-timeline',
wait_for_completion: true,
refresh: true,
body: {},
});
});
};