[8.17] implements backoff delay in usage counters service (#206363) (#206851)

# Backport

This will backport the following commits from `main` to `8.17`:
- [implements backoff delay in usage counters service
(#206363)](https://github.com/elastic/kibana/pull/206363)

<!--- Backport version: 9.4.3 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Christiane (Tina)
Heiligers","email":"christiane.heiligers@elastic.co"},"sourceCommit":{"committedDate":"2025-01-15T18:42:44Z","message":"implements
backoff delay in usage counters service (#206363)\n\nfix
https://github.com/elastic/kibana/issues/192829\r\n\r\n##
Summary\r\n\r\nUsage counters lose counts when there's a version
conflict exception to\r\nincrement counters.\r\nATM, the service retries
`incrementCounter` immediately on error, which\r\ncould also fail if
another Kibana tries at the same time.\r\nUsing a backoff retry reduces
the likelihood of a version conflict\r\nexception.  \r\n\r\nThis PR
replaces the retry with an exponential `backoffDelay` that will\r\nwait
longer between retry attempts.\r\n\r\n\r\n- [x] [Unit or
functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere
updated or added to match the most common scenarios\r\n- [x] [Flaky
Test\r\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1)
was\r\nused on any tests
changed\r\n(https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/7701)\r\n\r\n---------\r\n\r\nCo-authored-by:
Elastic Machine
<elasticmachine@users.noreply.github.com>","sha":"fd3b4ed590006212855c1a80994649eacbf41f31","branchLabelMapping":{"^v9.0.0$":"main","^v8.18.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","v9.0.0","backport:all-open"],"title":"implements
backoff delay in usage counters
service","number":206363,"url":"https://github.com/elastic/kibana/pull/206363","mergeCommit":{"message":"implements
backoff delay in usage counters service (#206363)\n\nfix
https://github.com/elastic/kibana/issues/192829\r\n\r\n##
Summary\r\n\r\nUsage counters lose counts when there's a version
conflict exception to\r\nincrement counters.\r\nATM, the service retries
`incrementCounter` immediately on error, which\r\ncould also fail if
another Kibana tries at the same time.\r\nUsing a backoff retry reduces
the likelihood of a version conflict\r\nexception.  \r\n\r\nThis PR
replaces the retry with an exponential `backoffDelay` that will\r\nwait
longer between retry attempts.\r\n\r\n\r\n- [x] [Unit or
functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere
updated or added to match the most common scenarios\r\n- [x] [Flaky
Test\r\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1)
was\r\nused on any tests
changed\r\n(https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/7701)\r\n\r\n---------\r\n\r\nCo-authored-by:
Elastic Machine
<elasticmachine@users.noreply.github.com>","sha":"fd3b4ed590006212855c1a80994649eacbf41f31"}},"sourceBranch":"main","suggestedTargetBranches":[],"targetPullRequestStates":[{"branch":"main","label":"v9.0.0","branchLabelMappingKey":"^v9.0.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/206363","number":206363,"mergeCommit":{"message":"implements
backoff delay in usage counters service (#206363)\n\nfix
https://github.com/elastic/kibana/issues/192829\r\n\r\n##
Summary\r\n\r\nUsage counters lose counts when there's a version
conflict exception to\r\nincrement counters.\r\nATM, the service retries
`incrementCounter` immediately on error, which\r\ncould also fail if
another Kibana tries at the same time.\r\nUsing a backoff retry reduces
the likelihood of a version conflict\r\nexception.  \r\n\r\nThis PR
replaces the retry with an exponential `backoffDelay` that will\r\nwait
longer between retry attempts.\r\n\r\n\r\n- [x] [Unit or
functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere
updated or added to match the most common scenarios\r\n- [x] [Flaky
Test\r\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1)
was\r\nused on any tests
changed\r\n(https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/7701)\r\n\r\n---------\r\n\r\nCo-authored-by:
Elastic Machine
<elasticmachine@users.noreply.github.com>","sha":"fd3b4ed590006212855c1a80994649eacbf41f31"}}]}]
BACKPORT-->

Co-authored-by: Christiane (Tina) Heiligers <christiane.heiligers@elastic.co>
This commit is contained in:
Kibana Machine 2025-01-16 07:35:10 +11:00 committed by GitHub
parent 60ac68eecf
commit fdce5f85fb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 67 additions and 45 deletions

View file

@ -10,3 +10,4 @@
export const KIBANA_STATS_TYPE = 'kibana_stats';
export const DEFAULT_MAXIMUM_WAIT_TIME_FOR_ALL_COLLECTORS_IN_S = 1;
export const MAIN_APP_DEFAULT_VIEW_ID = 'main';
export const USAGE_COUNTERS_BACKOFF_DELAY_IN_MS = 500;

View file

@ -14,7 +14,7 @@ import { DEFAULT_MAXIMUM_WAIT_TIME_FOR_ALL_COLLECTORS_IN_S } from '../common/con
export const configSchema = schema.object({
usageCounters: schema.object({
enabled: schema.boolean({ defaultValue: true }),
retryCount: schema.number({ defaultValue: 1 }),
retryCount: schema.number({ defaultValue: 3 }),
bufferDuration: schema.duration({ defaultValue: '5s' }),
}),
uiCounters: schema.object({

View file

@ -58,7 +58,6 @@ export class UsageCounter implements IUsageCounter {
incrementBy = 1,
namespace,
} = params;
this.counter$.next({
domainId: this.domainId,
counterName,

View file

@ -25,9 +25,10 @@ const registerUsageCountersRollupsMock = registerUsageCountersRollups as jest.Mo
typeof registerUsageCountersRollups
>;
const tick = () => {
// optionally advance test timers after a delay
const tickWithDelay = (delay = 1) => {
jest.useRealTimers();
return new Promise((resolve) => setTimeout(resolve, 1));
return new Promise((resolve) => setTimeout(resolve, delay));
};
describe('UsageCountersService', () => {
@ -188,51 +189,62 @@ describe('UsageCountersService', () => {
]
`);
});
// requires extended test runtime because of exponential backoff delay for retries
it(
'retries errors by `retryCount` times before failing to store',
async () => {
const retryConst = 2;
const usageCountersService = new UsageCountersService({
logger,
retryCount: retryConst,
bufferDurationMs: 50000,
});
it('retries errors by `retryCount` times before failing to store', async () => {
const usageCountersService = new UsageCountersService({
logger,
retryCount: 1,
bufferDurationMs,
});
const mockRepository = coreStart.savedObjects.createInternalRepository();
const mockError = new Error('failed');
const mockIncrementCounter = jest.fn().mockImplementation((_, key) => {
switch (key) {
case 'test-counter:counterA:count:server:20210409':
throw mockError;
case 'test-counter:counterB:count:server:20210409':
return 'pass';
default:
throw new Error(`unknown key ${key}`);
}
});
const mockRepository = coreStart.savedObjects.createInternalRepository();
const mockError = new Error('failed.');
const mockIncrementCounter = jest.fn().mockImplementation((_, key) => {
switch (key) {
case 'test-counter:counterA:count:server:20210409':
throw mockError;
case 'test-counter:counterB:count:server:20210409':
return 'pass';
default:
throw new Error(`unknown key ${key}`);
}
});
mockRepository.incrementCounter = mockIncrementCounter;
mockRepository.incrementCounter = mockIncrementCounter;
coreStart.savedObjects.createInternalRepository.mockReturnValue(mockRepository);
const { createUsageCounter } = usageCountersService.setup(coreSetup);
jest.useFakeTimers();
const usageCounter = createUsageCounter('test-counter');
coreStart.savedObjects.createInternalRepository.mockReturnValue(mockRepository);
const { createUsageCounter } = usageCountersService.setup(coreSetup);
jest.useFakeTimers();
const usageCounter = createUsageCounter('test-counter');
usageCountersService.start(coreStart);
usageCounter.incrementCounter({ counterName: 'counterA' });
usageCounter.incrementCounter({ counterName: 'counterB' });
jest.runOnlyPendingTimers();
usageCountersService.start(coreStart);
usageCounter.incrementCounter({ counterName: 'counterA' });
usageCounter.incrementCounter({ counterName: 'counterB' });
jest.runOnlyPendingTimers();
// wait for retries to kick in on next scheduler call
await tick();
// number of incrementCounter calls + number of retries
expect(mockIncrementCounter).toBeCalledTimes(2 + 1);
expect(logger.debug).toHaveBeenNthCalledWith(1, 'Store counters into savedObjects', {
kibana: {
usageCounters: {
results: [mockError, 'pass'],
// wait for retries to kick in on next scheduler call
await tickWithDelay(5000);
// number of incrementCounter calls + number of retries
expect(mockIncrementCounter).toBeCalledTimes(2 + retryConst);
// assert counterA increment error warning logs
expect(logger.warn).toHaveBeenNthCalledWith(
2,
`${mockError}, retrying attempt ${retryConst}`
);
expect(logger.warn).toHaveBeenNthCalledWith(3, mockError);
expect(logger.debug).toHaveBeenNthCalledWith(1, 'Store counters into savedObjects', {
kibana: {
usageCounters: {
results: [mockError, 'pass'],
},
},
},
});
});
});
},
10 * 1000
);
it('buffers counters within `bufferDurationMs` time', async () => {
const usageCountersService = new UsageCountersService({
@ -264,7 +276,7 @@ describe('UsageCountersService', () => {
jest.runOnlyPendingTimers();
// wait for debounce to kick in on next scheduler call
await tick();
await tickWithDelay();
expect(mockIncrementCounter).toBeCalledTimes(2);
expect(mockIncrementCounter.mock.results.map(({ value }) => value)).toMatchInlineSnapshot(`
Array [

View file

@ -33,6 +33,7 @@ import {
} from './saved_objects';
import { registerUsageCountersRollups } from './rollups';
import { searchUsageCounters } from './search';
import { USAGE_COUNTERS_BACKOFF_DELAY_IN_MS } from '../../common/constants';
interface UsageCountersLogMeta extends LogMeta {
kibana: { usageCounters: { results: unknown[] } };
@ -159,6 +160,9 @@ export class UsageCountersService {
};
};
private backoffDelay = (attempt: number) =>
Math.pow(2, attempt) * USAGE_COUNTERS_BACKOFF_DELAY_IN_MS; // exponential backoff: 500ms, 1000ms, 2000ms
private storeDate$(
counters: UsageCounters.v1.CounterMetric[],
soRepository: Pick<SavedObjectsRepository, 'incrementCounter'>
@ -166,7 +170,13 @@ export class UsageCountersService {
return Rx.forkJoin(
counters.map((metric) =>
Rx.defer(() => storeCounter({ metric, soRepository })).pipe(
Rx.retry(this.retryCount),
Rx.retry({
count: this.retryCount,
delay: (error, retryIndex) => {
this.logger.warn(`Error: ${error.message}, retrying attempt ${retryIndex}`); // extra warning logger
return Rx.timer(this.backoffDelay(retryIndex));
},
}),
Rx.catchError((error) => {
this.logger.warn(error);
return Rx.of(error);