mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 09:48:58 -04:00
# Backport This will backport the following commits from `main` to `8.16`: - [implements backoff delay in usage counters service (#206363)](https://github.com/elastic/kibana/pull/206363) <!--- Backport version: 9.4.3 --> ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport) <!--BACKPORT [{"author":{"name":"Christiane (Tina) Heiligers","email":"christiane.heiligers@elastic.co"},"sourceCommit":{"committedDate":"2025-01-15T18:42:44Z","message":"implements backoff delay in usage counters service (#206363)\n\nfix https://github.com/elastic/kibana/issues/192829\r\n\r\n## Summary\r\n\r\nUsage counters lose counts when there's a version conflict exception to\r\nincrement counters.\r\nATM, the service retries `incrementCounter` immediately on error, which\r\ncould also fail if another Kibana tries at the same time.\r\nUsing a backoff retry reduces the likelihood of a version conflict\r\nexception. \r\n\r\nThis PR replaces the retry with an exponential `backoffDelay` that will\r\nwait longer between retry attempts.\r\n\r\n\r\n- [x] [Unit or functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere updated or added to match the most common scenarios\r\n- [x] [Flaky Test\r\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was\r\nused on any tests changed\r\n(https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/7701)\r\n\r\n---------\r\n\r\nCo-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>","sha":"fd3b4ed590006212855c1a80994649eacbf41f31","branchLabelMapping":{"^v9.0.0$":"main","^v8.18.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","v9.0.0","backport:all-open"],"title":"implements backoff delay in usage counters service","number":206363,"url":"https://github.com/elastic/kibana/pull/206363","mergeCommit":{"message":"implements backoff delay in usage counters service (#206363)\n\nfix https://github.com/elastic/kibana/issues/192829\r\n\r\n## Summary\r\n\r\nUsage counters lose counts when there's a version conflict exception to\r\nincrement counters.\r\nATM, the service retries `incrementCounter` immediately on error, which\r\ncould also fail if another Kibana tries at the same time.\r\nUsing a backoff retry reduces the likelihood of a version conflict\r\nexception. \r\n\r\nThis PR replaces the retry with an exponential `backoffDelay` that will\r\nwait longer between retry attempts.\r\n\r\n\r\n- [x] [Unit or functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere updated or added to match the most common scenarios\r\n- [x] [Flaky Test\r\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was\r\nused on any tests changed\r\n(https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/7701)\r\n\r\n---------\r\n\r\nCo-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>","sha":"fd3b4ed590006212855c1a80994649eacbf41f31"}},"sourceBranch":"main","suggestedTargetBranches":[],"targetPullRequestStates":[{"branch":"main","label":"v9.0.0","branchLabelMappingKey":"^v9.0.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/206363","number":206363,"mergeCommit":{"message":"implements backoff delay in usage counters service (#206363)\n\nfix https://github.com/elastic/kibana/issues/192829\r\n\r\n## Summary\r\n\r\nUsage counters lose counts when there's a version conflict exception to\r\nincrement counters.\r\nATM, the service retries `incrementCounter` immediately on error, which\r\ncould also fail if another Kibana tries at the same time.\r\nUsing a backoff retry reduces the likelihood of a version conflict\r\nexception. \r\n\r\nThis PR replaces the retry with an exponential `backoffDelay` that will\r\nwait longer between retry attempts.\r\n\r\n\r\n- [x] [Unit or functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere updated or added to match the most common scenarios\r\n- [x] [Flaky Test\r\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was\r\nused on any tests changed\r\n(https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/7701)\r\n\r\n---------\r\n\r\nCo-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>","sha":"fd3b4ed590006212855c1a80994649eacbf41f31"}}]}] BACKPORT--> Co-authored-by: Christiane (Tina) Heiligers <christiane.heiligers@elastic.co>
This commit is contained in:
parent
e9944f8a3f
commit
6f206300e1
5 changed files with 67 additions and 45 deletions
|
@ -10,3 +10,4 @@
|
|||
export const KIBANA_STATS_TYPE = 'kibana_stats';
|
||||
export const DEFAULT_MAXIMUM_WAIT_TIME_FOR_ALL_COLLECTORS_IN_S = 1;
|
||||
export const MAIN_APP_DEFAULT_VIEW_ID = 'main';
|
||||
export const USAGE_COUNTERS_BACKOFF_DELAY_IN_MS = 500;
|
||||
|
|
|
@ -14,7 +14,7 @@ import { DEFAULT_MAXIMUM_WAIT_TIME_FOR_ALL_COLLECTORS_IN_S } from '../common/con
|
|||
export const configSchema = schema.object({
|
||||
usageCounters: schema.object({
|
||||
enabled: schema.boolean({ defaultValue: true }),
|
||||
retryCount: schema.number({ defaultValue: 1 }),
|
||||
retryCount: schema.number({ defaultValue: 3 }),
|
||||
bufferDuration: schema.duration({ defaultValue: '5s' }),
|
||||
}),
|
||||
uiCounters: schema.object({
|
||||
|
|
|
@ -58,7 +58,6 @@ export class UsageCounter implements IUsageCounter {
|
|||
incrementBy = 1,
|
||||
namespace,
|
||||
} = params;
|
||||
|
||||
this.counter$.next({
|
||||
domainId: this.domainId,
|
||||
counterName,
|
||||
|
|
|
@ -25,9 +25,10 @@ const registerUsageCountersRollupsMock = registerUsageCountersRollups as jest.Mo
|
|||
typeof registerUsageCountersRollups
|
||||
>;
|
||||
|
||||
const tick = () => {
|
||||
// optionally advance test timers after a delay
|
||||
const tickWithDelay = (delay = 1) => {
|
||||
jest.useRealTimers();
|
||||
return new Promise((resolve) => setTimeout(resolve, 1));
|
||||
return new Promise((resolve) => setTimeout(resolve, delay));
|
||||
};
|
||||
|
||||
describe('UsageCountersService', () => {
|
||||
|
@ -188,51 +189,62 @@ describe('UsageCountersService', () => {
|
|||
]
|
||||
`);
|
||||
});
|
||||
// requires extended test runtime because of exponential backoff delay for retries
|
||||
it(
|
||||
'retries errors by `retryCount` times before failing to store',
|
||||
async () => {
|
||||
const retryConst = 2;
|
||||
const usageCountersService = new UsageCountersService({
|
||||
logger,
|
||||
retryCount: retryConst,
|
||||
bufferDurationMs: 50000,
|
||||
});
|
||||
|
||||
it('retries errors by `retryCount` times before failing to store', async () => {
|
||||
const usageCountersService = new UsageCountersService({
|
||||
logger,
|
||||
retryCount: 1,
|
||||
bufferDurationMs,
|
||||
});
|
||||
const mockRepository = coreStart.savedObjects.createInternalRepository();
|
||||
const mockError = new Error('failed');
|
||||
const mockIncrementCounter = jest.fn().mockImplementation((_, key) => {
|
||||
switch (key) {
|
||||
case 'test-counter:counterA:count:server:20210409':
|
||||
throw mockError;
|
||||
case 'test-counter:counterB:count:server:20210409':
|
||||
return 'pass';
|
||||
default:
|
||||
throw new Error(`unknown key ${key}`);
|
||||
}
|
||||
});
|
||||
|
||||
const mockRepository = coreStart.savedObjects.createInternalRepository();
|
||||
const mockError = new Error('failed.');
|
||||
const mockIncrementCounter = jest.fn().mockImplementation((_, key) => {
|
||||
switch (key) {
|
||||
case 'test-counter:counterA:count:server:20210409':
|
||||
throw mockError;
|
||||
case 'test-counter:counterB:count:server:20210409':
|
||||
return 'pass';
|
||||
default:
|
||||
throw new Error(`unknown key ${key}`);
|
||||
}
|
||||
});
|
||||
mockRepository.incrementCounter = mockIncrementCounter;
|
||||
|
||||
mockRepository.incrementCounter = mockIncrementCounter;
|
||||
coreStart.savedObjects.createInternalRepository.mockReturnValue(mockRepository);
|
||||
const { createUsageCounter } = usageCountersService.setup(coreSetup);
|
||||
jest.useFakeTimers();
|
||||
const usageCounter = createUsageCounter('test-counter');
|
||||
|
||||
coreStart.savedObjects.createInternalRepository.mockReturnValue(mockRepository);
|
||||
const { createUsageCounter } = usageCountersService.setup(coreSetup);
|
||||
jest.useFakeTimers();
|
||||
const usageCounter = createUsageCounter('test-counter');
|
||||
usageCountersService.start(coreStart);
|
||||
usageCounter.incrementCounter({ counterName: 'counterA' });
|
||||
usageCounter.incrementCounter({ counterName: 'counterB' });
|
||||
jest.runOnlyPendingTimers();
|
||||
|
||||
usageCountersService.start(coreStart);
|
||||
usageCounter.incrementCounter({ counterName: 'counterA' });
|
||||
usageCounter.incrementCounter({ counterName: 'counterB' });
|
||||
jest.runOnlyPendingTimers();
|
||||
|
||||
// wait for retries to kick in on next scheduler call
|
||||
await tick();
|
||||
// number of incrementCounter calls + number of retries
|
||||
expect(mockIncrementCounter).toBeCalledTimes(2 + 1);
|
||||
expect(logger.debug).toHaveBeenNthCalledWith(1, 'Store counters into savedObjects', {
|
||||
kibana: {
|
||||
usageCounters: {
|
||||
results: [mockError, 'pass'],
|
||||
// wait for retries to kick in on next scheduler call
|
||||
await tickWithDelay(5000);
|
||||
// number of incrementCounter calls + number of retries
|
||||
expect(mockIncrementCounter).toBeCalledTimes(2 + retryConst);
|
||||
// assert counterA increment error warning logs
|
||||
expect(logger.warn).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
`${mockError}, retrying attempt ${retryConst}`
|
||||
);
|
||||
expect(logger.warn).toHaveBeenNthCalledWith(3, mockError);
|
||||
expect(logger.debug).toHaveBeenNthCalledWith(1, 'Store counters into savedObjects', {
|
||||
kibana: {
|
||||
usageCounters: {
|
||||
results: [mockError, 'pass'],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
},
|
||||
10 * 1000
|
||||
);
|
||||
|
||||
it('buffers counters within `bufferDurationMs` time', async () => {
|
||||
const usageCountersService = new UsageCountersService({
|
||||
|
@ -264,7 +276,7 @@ describe('UsageCountersService', () => {
|
|||
jest.runOnlyPendingTimers();
|
||||
|
||||
// wait for debounce to kick in on next scheduler call
|
||||
await tick();
|
||||
await tickWithDelay();
|
||||
expect(mockIncrementCounter).toBeCalledTimes(2);
|
||||
expect(mockIncrementCounter.mock.results.map(({ value }) => value)).toMatchInlineSnapshot(`
|
||||
Array [
|
||||
|
|
|
@ -33,6 +33,7 @@ import {
|
|||
} from './saved_objects';
|
||||
import { registerUsageCountersRollups } from './rollups';
|
||||
import { searchUsageCounters } from './search';
|
||||
import { USAGE_COUNTERS_BACKOFF_DELAY_IN_MS } from '../../common/constants';
|
||||
|
||||
interface UsageCountersLogMeta extends LogMeta {
|
||||
kibana: { usageCounters: { results: unknown[] } };
|
||||
|
@ -159,6 +160,9 @@ export class UsageCountersService {
|
|||
};
|
||||
};
|
||||
|
||||
private backoffDelay = (attempt: number) =>
|
||||
Math.pow(2, attempt) * USAGE_COUNTERS_BACKOFF_DELAY_IN_MS; // exponential backoff: 500ms, 1000ms, 2000ms
|
||||
|
||||
private storeDate$(
|
||||
counters: UsageCounters.v1.CounterMetric[],
|
||||
soRepository: Pick<SavedObjectsRepository, 'incrementCounter'>
|
||||
|
@ -166,7 +170,13 @@ export class UsageCountersService {
|
|||
return Rx.forkJoin(
|
||||
counters.map((metric) =>
|
||||
Rx.defer(() => storeCounter({ metric, soRepository })).pipe(
|
||||
Rx.retry(this.retryCount),
|
||||
Rx.retry({
|
||||
count: this.retryCount,
|
||||
delay: (error, retryIndex) => {
|
||||
this.logger.warn(`Error: ${error.message}, retrying attempt ${retryIndex}`); // extra warning logger
|
||||
return Rx.timer(this.backoffDelay(retryIndex));
|
||||
},
|
||||
}),
|
||||
Rx.catchError((error) => {
|
||||
this.logger.warn(error);
|
||||
return Rx.of(error);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue