[Fleet] Add retries w/ backoff to Fleet setup on Kibana boot (#167246)

Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
Co-authored-by: Julia Bardi <90178898+juliaElastic@users.noreply.github.com>
This commit is contained in:
Josh Dover 2023-09-28 14:34:50 +02:00 committed by GitHub
parent 239e50389f
commit a42d601fe5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 42 additions and 5 deletions

View file

@ -7,6 +7,7 @@ xpack.fleet.internal.disableILMPolicies: true
xpack.fleet.internal.disableProxies: true
xpack.fleet.internal.activeAgentsSoftLimit: 25000
xpack.fleet.internal.onlyAllowAgentUpgradeToKnownVersions: true
xpack.fleet.internal.retrySetupOnBoot: true
# Cloud links
xpack.cloud.base_url: 'https://cloud.elastic.co'

View file

@ -888,6 +888,7 @@
"email-addresses": "^5.0.0",
"execa": "^5.1.1",
"expiry-js": "0.1.7",
"exponential-backoff": "^3.1.1",
"extract-zip": "^2.0.1",
"fast-deep-equal": "^3.1.1",
"fflate": "^0.6.9",

View file

@ -51,6 +51,7 @@ export interface FleetConfigType {
fleetServerStandalone: boolean;
onlyAllowAgentUpgradeToKnownVersions: boolean;
activeAgentsSoftLimit?: number;
retrySetupOnBoot: boolean;
registry: {
kibanaVersionCheckEnabled: boolean;
capabilities: string[];

View file

@ -188,6 +188,7 @@ export const config: PluginConfigDescriptor = {
min: 0,
})
),
retrySetupOnBoot: schema.boolean({ defaultValue: false }),
registry: schema.object(
{
kibanaVersionCheckEnabled: schema.boolean({ defaultValue: true }),

View file

@ -5,6 +5,7 @@
* 2.0.
*/
import { backOff } from 'exponential-backoff';
import type { Observable } from 'rxjs';
import { BehaviorSubject } from 'rxjs';
import { take, filter } from 'rxjs/operators';
@ -532,9 +533,39 @@ export class FleetPlugin
)
.toPromise();
await setupFleet(
new SavedObjectsClient(core.savedObjects.createInternalRepository()),
core.elasticsearch.client.asInternalUser
// Retry Fleet setup w/ backoff
await backOff(
async () => {
await setupFleet(
new SavedObjectsClient(core.savedObjects.createInternalRepository()),
core.elasticsearch.client.asInternalUser
);
},
{
// We only retry when this feature flag is enabled
numOfAttempts: this.configInitialValue.internal?.retrySetupOnBoot ? Infinity : 1,
// 250ms initial backoff
startingDelay: 250,
// 5m max backoff
maxDelay: 60000 * 5,
timeMultiple: 2,
// avoid HA contention with other Kibana instances
jitter: 'full',
retry: (error: any, attemptCount: number) => {
const summary = `Fleet setup attempt ${attemptCount} failed, will retry after backoff`;
logger.debug(summary, { error: { message: error } });
this.fleetStatus$.next({
level: ServiceStatusLevels.available,
summary,
meta: {
attemptCount,
error,
},
});
return true;
},
}
);
this.fleetStatus$.next({
@ -542,8 +573,7 @@ export class FleetPlugin
summary: 'Fleet is available',
});
} catch (error) {
logger.warn('Fleet setup failed');
logger.warn(error);
logger.warn('Fleet setup failed', { error: { message: error } });
this.fleetStatus$.next({
// As long as Fleet has a dependency on EPR, we can't reliably set Kibana status to `unavailable` here.

View file

@ -135,6 +135,7 @@ describe('_installPackage', () => {
disableProxies: false,
fleetServerStandalone: false,
onlyAllowAgentUpgradeToKnownVersions: false,
retrySetupOnBoot: false,
registry: {
kibanaVersionCheckEnabled: true,
capabilities: [],
@ -192,6 +193,7 @@ describe('_installPackage', () => {
disableILMPolicies: false,
fleetServerStandalone: false,
onlyAllowAgentUpgradeToKnownVersions: false,
retrySetupOnBoot: false,
registry: {
kibanaVersionCheckEnabled: true,
capabilities: [],
@ -265,6 +267,7 @@ describe('_installPackage', () => {
disableProxies: false,
fleetServerStandalone: false,
onlyAllowAgentUpgradeToKnownVersions: false,
retrySetupOnBoot: false,
registry: {
kibanaVersionCheckEnabled: true,
capabilities: [],