mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 01:13:23 -04:00
[Defend Workflows][8.12] Unblock fleet setup when validating thousands of uninstall tokens (#174535)
## Summary
To unblock Fleet setup when dealing with thousands of agent policies,
the query that's used to get decrypted SOs is now performed in batches.
The size of the batch is related to
[`max_clause_count`](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-settings.html),
and it looks like the batch size needs to be smaller than the _half of
the value_.
`max_clause_count` is at least `1024`, so going with `500` should work
on even environments with scarcest resources.
With this modification, Fleet setup is successfully performed. Other
issues (like errors in Uninstall Tokens table) have not arised.
**TODO:** updating/adding tests
## Testing
- you need to find out the value of
`indices.query.bool.max_clause_count` on your local machine (is there an
easy way?)
- for that, checkout `8.12`,
- add a lot of Agent Policies: so far we experienced the issue with 2.7k
policies on one machine and 3.2k on another. here's a modified version
of @dasansol92 's script for adding Agent Policies (without Defend):
e6f24b2d7d
- restart Kibana
- you should see the `too_many_nested_clauses` error + you should not be
able to open any Fleet pages in Kibana
(if not, add more policies)
- now, switch to this branch without stopping ES
- restart Kibana: you should see that fleet setup is successful + you
should be able to access Fleet pages
## Checklist
Delete any items that are not applicable to this PR.
- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios
- [ ] [Flaky Test
Runner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was
used on any tests changed
---------
Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
parent
e9092c0a17
commit
a7d3d2203b
4 changed files with 101 additions and 7 deletions
|
@ -39,6 +39,7 @@ export interface FleetConfigType {
|
|||
};
|
||||
setup?: {
|
||||
agentPolicySchemaUpgradeBatchSize?: number;
|
||||
uninstallTokenVerificationBatchSize?: number;
|
||||
};
|
||||
developer?: {
|
||||
maxAgentPoliciesWithInactivityTimeout?: number;
|
||||
|
|
|
@ -150,6 +150,7 @@ export const config: PluginConfigDescriptor = {
|
|||
setup: schema.maybe(
|
||||
schema.object({
|
||||
agentPolicySchemaUpgradeBatchSize: schema.maybe(schema.number()),
|
||||
uninstallTokenVerificationBatchSize: schema.maybe(schema.number()),
|
||||
})
|
||||
),
|
||||
developer: schema.object({
|
||||
|
|
|
@ -13,6 +13,8 @@ import type { SavedObjectsClientContract } from '@kbn/core/server';
|
|||
import type { EncryptedSavedObjectsClient } from '@kbn/encrypted-saved-objects-plugin/server';
|
||||
import { encryptedSavedObjectsMock } from '@kbn/encrypted-saved-objects-plugin/server/mocks';
|
||||
|
||||
import { errors } from '@elastic/elasticsearch';
|
||||
|
||||
import { UninstallTokenError } from '../../../../common/errors';
|
||||
|
||||
import { SO_SEARCH_LIMIT } from '../../../../common';
|
||||
|
@ -527,6 +529,48 @@ describe('UninstallTokenService', () => {
|
|||
).resolves.toBeNull();
|
||||
});
|
||||
|
||||
describe('avoiding `too_many_nested_clauses` error', () => {
|
||||
it('performs one query if number of policies is smaller than batch size', async () => {
|
||||
mockCreatePointInTimeFinderAsInternalUser();
|
||||
await uninstallTokenService.checkTokenValidityForAllPolicies();
|
||||
|
||||
expect(esoClientMock.createPointInTimeFinderDecryptedAsInternalUser).toBeCalledTimes(1);
|
||||
expect(esoClientMock.createPointInTimeFinderDecryptedAsInternalUser).toBeCalledWith({
|
||||
filter:
|
||||
'fleet-uninstall-tokens.id: "test-so-id" or fleet-uninstall-tokens.id: "test-so-id-two"',
|
||||
perPage: 10000,
|
||||
type: 'fleet-uninstall-tokens',
|
||||
});
|
||||
});
|
||||
|
||||
it('performs multiple queries if number of policies is larger than batch size', async () => {
|
||||
// @ts-ignore
|
||||
appContextService.getConfig().setup = { uninstallTokenVerificationBatchSize: 1 };
|
||||
|
||||
mockCreatePointInTimeFinderAsInternalUser();
|
||||
|
||||
await uninstallTokenService.checkTokenValidityForAllPolicies();
|
||||
|
||||
expect(esoClientMock.createPointInTimeFinderDecryptedAsInternalUser).toBeCalledTimes(2);
|
||||
|
||||
expect(
|
||||
esoClientMock.createPointInTimeFinderDecryptedAsInternalUser
|
||||
).toHaveBeenNthCalledWith(1, {
|
||||
filter: 'fleet-uninstall-tokens.id: "test-so-id"',
|
||||
perPage: 10000,
|
||||
type: 'fleet-uninstall-tokens',
|
||||
});
|
||||
|
||||
expect(
|
||||
esoClientMock.createPointInTimeFinderDecryptedAsInternalUser
|
||||
).toHaveBeenNthCalledWith(2, {
|
||||
filter: 'fleet-uninstall-tokens.id: "test-so-id-two"',
|
||||
perPage: 10000,
|
||||
type: 'fleet-uninstall-tokens',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it('returns error if any of the tokens is missing', async () => {
|
||||
mockCreatePointInTimeFinderAsInternalUser([okaySO, missingTokenSO2]);
|
||||
|
||||
|
@ -597,6 +641,26 @@ describe('UninstallTokenService', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('returns error on `too_many_nested_clauses` error', async () => {
|
||||
// @ts-ignore
|
||||
const responseError = new errors.ResponseError({});
|
||||
responseError.message = 'this is a too_many_nested_clauses error';
|
||||
|
||||
esoClientMock.createPointInTimeFinderDecryptedAsInternalUser = jest
|
||||
.fn()
|
||||
.mockRejectedValueOnce(responseError);
|
||||
|
||||
await expect(
|
||||
uninstallTokenService.checkTokenValidityForAllPolicies()
|
||||
).resolves.toStrictEqual({
|
||||
error: new UninstallTokenError(
|
||||
'Failed to validate uninstall tokens: `too_many_nested_clauses` error received. ' +
|
||||
'Setting/decreasing the value of `xpack.fleet.setup.uninstallTokenVerificationBatchSize` in your kibana.yml should help. ' +
|
||||
`Current value is 500.`
|
||||
),
|
||||
});
|
||||
});
|
||||
|
||||
it('throws error in case of unknown error', async () => {
|
||||
esoClientMock.createPointInTimeFinderDecryptedAsInternalUser = jest
|
||||
.fn()
|
||||
|
|
|
@ -23,13 +23,15 @@ import type {
|
|||
import type { EncryptedSavedObjectsClient } from '@kbn/encrypted-saved-objects-plugin/server';
|
||||
import type { KibanaRequest } from '@kbn/core-http-server';
|
||||
import { SECURITY_EXTENSION_ID } from '@kbn/core-saved-objects-server';
|
||||
import { asyncForEach } from '@kbn/std';
|
||||
import { asyncForEach, asyncMap } from '@kbn/std';
|
||||
|
||||
import type {
|
||||
AggregationsTermsInclude,
|
||||
AggregationsTermsExclude,
|
||||
} from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
|
||||
|
||||
import { isResponseError } from '@kbn/es-errors';
|
||||
|
||||
import { UninstallTokenError } from '../../../../common/errors';
|
||||
|
||||
import type { GetUninstallTokensMetadataResponse } from '../../../../common/types/rest_spec/uninstall_token';
|
||||
|
@ -208,15 +210,31 @@ export class UninstallTokenService implements UninstallTokenServiceInterface {
|
|||
return [];
|
||||
}
|
||||
|
||||
const filter: string = tokenObjectHits
|
||||
.map(({ _id }) => {
|
||||
return `${UNINSTALL_TOKENS_SAVED_OBJECT_TYPE}.id: "${_id}"`;
|
||||
})
|
||||
.join(' or ');
|
||||
const filterEntries: string[] = tokenObjectHits.map(
|
||||
({ _id }) => `${UNINSTALL_TOKENS_SAVED_OBJECT_TYPE}.id: "${_id}"`
|
||||
);
|
||||
|
||||
return this.getDecryptedTokens({ filter });
|
||||
const uninstallTokenChunks: UninstallToken[][] = await asyncMap(
|
||||
chunk(filterEntries, this.getUninstallTokenVerificationBatchSize()),
|
||||
(entries) => {
|
||||
const filter = entries.join(' or ');
|
||||
return this.getDecryptedTokens({ filter });
|
||||
}
|
||||
);
|
||||
|
||||
return uninstallTokenChunks.flat();
|
||||
}
|
||||
|
||||
private getUninstallTokenVerificationBatchSize = () => {
|
||||
/** If `uninstallTokenVerificationBatchSize` is too large, we get an error of `too_many_nested_clauses`.
|
||||
* Assuming that `max_clause_count` >= 1024, and experiencing that batch size should be less than half
|
||||
* than `max_clause_count` with our current query, batch size below 512 should be okay on every env.
|
||||
*/
|
||||
const config = appContextService.getConfig();
|
||||
|
||||
return config?.setup?.uninstallTokenVerificationBatchSize ?? 500;
|
||||
};
|
||||
|
||||
private getDecryptedTokens = async (
|
||||
options: Partial<SavedObjectsCreatePointInTimeFinderOptions>
|
||||
): Promise<UninstallToken[]> => {
|
||||
|
@ -523,6 +541,16 @@ export class UninstallTokenService implements UninstallTokenServiceInterface {
|
|||
if (error instanceof UninstallTokenError) {
|
||||
// known errors are considered non-fatal
|
||||
return { error };
|
||||
} else if (isResponseError(error) && error.message.includes('too_many_nested_clauses')) {
|
||||
// `too_many_nested_clauses` is considered non-fatal
|
||||
const errorMessage =
|
||||
'Failed to validate uninstall tokens: `too_many_nested_clauses` error received. ' +
|
||||
'Setting/decreasing the value of `xpack.fleet.setup.uninstallTokenVerificationBatchSize` in your kibana.yml should help. ' +
|
||||
`Current value is ${this.getUninstallTokenVerificationBatchSize()}.`;
|
||||
|
||||
appContextService.getLogger().warn(`${errorMessage}: '${error}'`);
|
||||
|
||||
return { error: new UninstallTokenError(errorMessage) };
|
||||
} else {
|
||||
const errorMessage = 'Unknown error happened while checking Uninstall Tokens validity';
|
||||
appContextService.getLogger().error(`${errorMessage}: '${error}'`);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue