make isInlineScriptingEnabled resilient to ES errors (#170208)

## Summary

Fix https://github.com/elastic/kibana/issues/163787

Change the way `isInlineScriptingEnabled` function to retry retryable
errors from ES (similar to how the valid connection or migration ES
calls do)
This commit is contained in:
Pierre Gayvallet 2023-10-31 13:55:03 +01:00 committed by GitHub
parent d26b3736ce
commit 8868d08745
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 220 additions and 34 deletions

View file

@ -30,4 +30,5 @@ export { CoreElasticsearchRouteHandlerContext } from './src/elasticsearch_route_
export { retryCallCluster, migrationRetryCallCluster } from './src/retry_call_cluster';
export { isInlineScriptingEnabled } from './src/is_scripting_enabled';
export { getCapabilitiesFromClient } from './src/get_capabilities';
export { isRetryableEsClientError } from './src/retryable_es_client_errors';
export type { ClusterInfo } from './src/get_cluster_info';

View file

@ -0,0 +1,15 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
export const isRetryableEsClientErrorMock = jest.fn();
jest.doMock('./retryable_es_client_errors', () => {
return {
isRetryableEsClientError: isRetryableEsClientErrorMock,
};
});

View file

@ -6,6 +6,7 @@
* Side Public License, v 1.
*/
import { isRetryableEsClientErrorMock } from './is_scripting_enabled.test.mocks';
import * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { elasticsearchClientMock } from '@kbn/core-elasticsearch-client-server-mocks';
import { isInlineScriptingEnabled } from './is_scripting_enabled';
@ -94,4 +95,58 @@ describe('isInlineScriptingEnabled', () => {
expect(await isInlineScriptingEnabled({ client })).toEqual(false);
});
describe('resiliency', () => {
beforeEach(() => {
isRetryableEsClientErrorMock.mockReset();
});
const mockSuccessOnce = () => {
client.cluster.getSettings.mockResolvedValueOnce({
transient: {},
persistent: {},
defaults: {},
});
};
const mockErrorOnce = () => {
client.cluster.getSettings.mockResponseImplementationOnce(() => {
throw Error('ERR CON REFUSED');
});
};
it('retries the ES api call in case of retryable error', async () => {
isRetryableEsClientErrorMock.mockReturnValue(true);
mockErrorOnce();
mockSuccessOnce();
await expect(isInlineScriptingEnabled({ client, maxRetryDelay: 1 })).resolves.toEqual(true);
expect(client.cluster.getSettings).toHaveBeenCalledTimes(2);
});
it('throws in case of non-retryable error', async () => {
isRetryableEsClientErrorMock.mockReturnValue(false);
mockErrorOnce();
mockSuccessOnce();
await expect(isInlineScriptingEnabled({ client, maxRetryDelay: 0.1 })).rejects.toThrowError(
'ERR CON REFUSED'
);
});
it('retries up to `maxRetries` times', async () => {
isRetryableEsClientErrorMock.mockReturnValue(true);
mockErrorOnce();
mockErrorOnce();
mockErrorOnce();
mockSuccessOnce();
await expect(
isInlineScriptingEnabled({ client, maxRetryDelay: 0.1, maxRetries: 2 })
).rejects.toThrowError('ERR CON REFUSED');
expect(client.cluster.getSettings).toHaveBeenCalledTimes(3);
});
});
});

View file

@ -6,27 +6,48 @@
* Side Public License, v 1.
*/
import { defer, map, retry, timer, firstValueFrom, throwError } from 'rxjs';
import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server';
import { isRetryableEsClientError } from './retryable_es_client_errors';
const scriptAllowedTypesKey = 'script.allowed_types';
export const isInlineScriptingEnabled = async ({
client,
maxRetries = 20,
maxRetryDelay = 64,
}: {
client: ElasticsearchClient;
maxRetries?: number;
maxRetryDelay?: number;
}): Promise<boolean> => {
const settings = await client.cluster.getSettings({
include_defaults: true,
flat_settings: true,
});
return firstValueFrom(
defer(() => {
return client.cluster.getSettings({
include_defaults: true,
flat_settings: true,
});
}).pipe(
retry({
count: maxRetries,
delay: (error, retryIndex) => {
if (isRetryableEsClientError(error)) {
const retryDelay = 1000 * Math.min(Math.pow(2, retryIndex), maxRetryDelay); // 2s, 4s, 8s, 16s, 32s, 64s, 64s, 64s ...
return timer(retryDelay);
} else {
return throwError(error);
}
},
}),
map((settings) => {
const scriptAllowedTypes: string[] =
settings.transient[scriptAllowedTypesKey] ??
settings.persistent[scriptAllowedTypesKey] ??
settings.defaults![scriptAllowedTypesKey] ??
[];
// priority: transient -> persistent -> default
const scriptAllowedTypes: string[] =
settings.transient[scriptAllowedTypesKey] ??
settings.persistent[scriptAllowedTypesKey] ??
settings.defaults![scriptAllowedTypesKey] ??
[];
// when unspecified, the setting as a default `[]` value that means that both scriptings are allowed.
return scriptAllowedTypes.length === 0 || scriptAllowedTypes.includes('inline');
return scriptAllowedTypes.length === 0 || scriptAllowedTypes.includes('inline');
})
)
);
};

View file

@ -0,0 +1,73 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
import { errors as esErrors } from '@elastic/elasticsearch';
import { elasticsearchClientMock } from '@kbn/core-elasticsearch-client-server-mocks';
import { isRetryableEsClientError } from './retryable_es_client_errors';
describe('isRetryableEsClientError', () => {
describe('returns `false` for', () => {
test('non-retryable response errors', async () => {
const error = new esErrors.ResponseError(
elasticsearchClientMock.createApiResponse({
body: { error: { type: 'cluster_block_exception' } },
statusCode: 400,
})
);
expect(isRetryableEsClientError(error)).toEqual(false);
});
});
describe('returns `true` for', () => {
it('NoLivingConnectionsError', () => {
const error = new esErrors.NoLivingConnectionsError(
'reason',
elasticsearchClientMock.createApiResponse()
);
expect(isRetryableEsClientError(error)).toEqual(true);
});
it('ConnectionError', () => {
const error = new esErrors.ConnectionError(
'reason',
elasticsearchClientMock.createApiResponse()
);
expect(isRetryableEsClientError(error)).toEqual(true);
});
it('TimeoutError', () => {
const error = new esErrors.TimeoutError(
'reason',
elasticsearchClientMock.createApiResponse()
);
expect(isRetryableEsClientError(error)).toEqual(true);
});
it('ResponseError of type snapshot_in_progress_exception', () => {
const error = new esErrors.ResponseError(
elasticsearchClientMock.createApiResponse({
body: { error: { type: 'snapshot_in_progress_exception' } },
})
);
expect(isRetryableEsClientError(error)).toEqual(true);
});
it.each([503, 401, 403, 408, 410, 429])('ResponseError with %p status code', (statusCode) => {
const error = new esErrors.ResponseError(
elasticsearchClientMock.createApiResponse({
statusCode,
body: { error: { type: 'reason' } },
})
);
expect(isRetryableEsClientError(error)).toEqual(true);
});
});
});

View file

@ -0,0 +1,40 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
import { errors as EsErrors } from '@elastic/elasticsearch';
const retryResponseStatuses = [
503, // ServiceUnavailable
401, // AuthorizationException
403, // AuthenticationException
408, // RequestTimeout
410, // Gone
429, // TooManyRequests -> ES circuit breaker
];
/**
* Returns true if the given elasticsearch error should be retried
* by retry-based resiliency systems such as the SO migration, false otherwise.
*/
export const isRetryableEsClientError = (e: EsErrors.ElasticsearchClientError): boolean => {
if (
e instanceof EsErrors.NoLivingConnectionsError ||
e instanceof EsErrors.ConnectionError ||
e instanceof EsErrors.TimeoutError ||
(e instanceof EsErrors.ResponseError &&
(retryResponseStatuses.includes(e?.statusCode!) ||
// ES returns a 400 Bad Request when trying to close or delete an
// index while snapshots are in progress. This should have been a 503
// so once https://github.com/elastic/elasticsearch/issues/65883 is
// fixed we can remove this.
e?.body?.error?.type === 'snapshot_in_progress_exception'))
) {
return true;
}
return false;
};

View file

@ -8,15 +8,7 @@
import * as Either from 'fp-ts/lib/Either';
import { errors as EsErrors } from '@elastic/elasticsearch';
const retryResponseStatuses = [
503, // ServiceUnavailable
401, // AuthorizationException
403, // AuthenticationException
408, // RequestTimeout
410, // Gone
429, // TooManyRequests -> ES circuit breaker
];
import { isRetryableEsClientError } from '@kbn/core-elasticsearch-server-internal';
export interface RetryableEsClientError {
type: 'retryable_es_client_error';
@ -27,18 +19,7 @@ export interface RetryableEsClientError {
export const catchRetryableEsClientErrors = (
e: EsErrors.ElasticsearchClientError
): Either.Either<RetryableEsClientError, never> => {
if (
e instanceof EsErrors.NoLivingConnectionsError ||
e instanceof EsErrors.ConnectionError ||
e instanceof EsErrors.TimeoutError ||
(e instanceof EsErrors.ResponseError &&
(retryResponseStatuses.includes(e?.statusCode!) ||
// ES returns a 400 Bad Request when trying to close or delete an
// index while snapshots are in progress. This should have been a 503
// so once https://github.com/elastic/elasticsearch/issues/65883 is
// fixed we can remove this.
e?.body?.error?.type === 'snapshot_in_progress_exception'))
) {
if (isRetryableEsClientError(e)) {
return Either.left({
type: 'retryable_es_client_error' as const,
message: e?.message,