[8.x] Put the auto calculation of capacity behind a feature flag, for now (#195390) (#195486)

# Backport

This will backport the following commits from `main` to `8.x`:
- [Put the auto calculation of capacity behind a feature flag, for now
(#195390)](https://github.com/elastic/kibana/pull/195390)

<!--- Backport version: 9.4.3 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Mike
Côté","email":"mikecote@users.noreply.github.com"},"sourceCommit":{"committedDate":"2024-10-08T17:48:07Z","message":"Put
the auto calculation of capacity behind a feature flag, for now
(#195390)\n\nIn this PR, I'm preparing for the 8.16 release where we'd
like to start\r\nrolling out the `mget` task claiming strategy
separately from the added\r\nconcurrency. To accomplish this, we need to
put the capacity calculation\r\nbehind a feature flag that is default to
false for now, until we do a\r\nsecond rollout with an increased
concurrency. The increased concurrency\r\ncan be calculated and adjusted
based on experiments of clusters
setting\r\n`xpack.task_manager.capacity` to a higher value and observe
the resource\r\nusage.\r\n\r\nPR to deploy to Cloud and verify that we
always default to 10 normal\r\ntasks:
https://github.com/elastic/kibana/pull/195392","sha":"9c8f689aca23ed8b1f560c57a9a660d318375412","branchLabelMapping":{"^v9.0.0$":"main","^v8.16.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","Feature:Task
Manager","Team:ResponseOps","v9.0.0","backport:prev-minor","v8.16.0"],"title":"Put
the auto calculation of capacity behind a feature flag, for
now","number":195390,"url":"https://github.com/elastic/kibana/pull/195390","mergeCommit":{"message":"Put
the auto calculation of capacity behind a feature flag, for now
(#195390)\n\nIn this PR, I'm preparing for the 8.16 release where we'd
like to start\r\nrolling out the `mget` task claiming strategy
separately from the added\r\nconcurrency. To accomplish this, we need to
put the capacity calculation\r\nbehind a feature flag that is default to
false for now, until we do a\r\nsecond rollout with an increased
concurrency. The increased concurrency\r\ncan be calculated and adjusted
based on experiments of clusters
setting\r\n`xpack.task_manager.capacity` to a higher value and observe
the resource\r\nusage.\r\n\r\nPR to deploy to Cloud and verify that we
always default to 10 normal\r\ntasks:
https://github.com/elastic/kibana/pull/195392","sha":"9c8f689aca23ed8b1f560c57a9a660d318375412"}},"sourceBranch":"main","suggestedTargetBranches":["8.x"],"targetPullRequestStates":[{"branch":"main","label":"v9.0.0","branchLabelMappingKey":"^v9.0.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/195390","number":195390,"mergeCommit":{"message":"Put
the auto calculation of capacity behind a feature flag, for now
(#195390)\n\nIn this PR, I'm preparing for the 8.16 release where we'd
like to start\r\nrolling out the `mget` task claiming strategy
separately from the added\r\nconcurrency. To accomplish this, we need to
put the capacity calculation\r\nbehind a feature flag that is default to
false for now, until we do a\r\nsecond rollout with an increased
concurrency. The increased concurrency\r\ncan be calculated and adjusted
based on experiments of clusters
setting\r\n`xpack.task_manager.capacity` to a higher value and observe
the resource\r\nusage.\r\n\r\nPR to deploy to Cloud and verify that we
always default to 10 normal\r\ntasks:
https://github.com/elastic/kibana/pull/195392","sha":"9c8f689aca23ed8b1f560c57a9a660d318375412"}},{"branch":"8.x","label":"v8.16.0","branchLabelMappingKey":"^v8.16.0$","isSourceBranch":false,"state":"NOT_CREATED"}]}]
BACKPORT-->

Co-authored-by: Mike Côté <mikecote@users.noreply.github.com>
This commit is contained in:
Kibana Machine 2024-10-09 06:44:32 +11:00 committed by GitHub
parent de0f3dc765
commit 59444d03d6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 89 additions and 3 deletions

View file

@ -416,6 +416,7 @@ kibana_vars=(
xpack.spaces.maxSpaces
xpack.task_manager.capacity
xpack.task_manager.claim_strategy
xpack.task_manager.auto_calculate_default_ech_capacity
xpack.task_manager.discovery.active_nodes_lookback
xpack.task_manager.discovery.interval
xpack.task_manager.kibanas_per_partition

View file

@ -13,6 +13,7 @@ describe('config validation', () => {
expect(configSchema.validate(config)).toMatchInlineSnapshot(`
Object {
"allow_reading_invalid_state": true,
"auto_calculate_default_ech_capacity": false,
"claim_strategy": "update_by_query",
"discovery": Object {
"active_nodes_lookback": "30s",
@ -75,6 +76,7 @@ describe('config validation', () => {
expect(configSchema.validate(config)).toMatchInlineSnapshot(`
Object {
"allow_reading_invalid_state": true,
"auto_calculate_default_ech_capacity": false,
"claim_strategy": "update_by_query",
"discovery": Object {
"active_nodes_lookback": "30s",
@ -135,6 +137,7 @@ describe('config validation', () => {
expect(configSchema.validate(config)).toMatchInlineSnapshot(`
Object {
"allow_reading_invalid_state": true,
"auto_calculate_default_ech_capacity": false,
"claim_strategy": "update_by_query",
"discovery": Object {
"active_nodes_lookback": "30s",

View file

@ -204,6 +204,7 @@ export const configSchema = schema.object(
}),
claim_strategy: schema.string({ defaultValue: CLAIM_STRATEGY_UPDATE_BY_QUERY }),
request_timeouts: requestTimeoutsConfig,
auto_calculate_default_ech_capacity: schema.boolean({ defaultValue: false }),
},
{
validate: (config) => {

View file

@ -88,6 +88,7 @@ describe('EphemeralTaskLifecycle', () => {
request_timeouts: {
update_by_query: 1000,
},
auto_calculate_default_ech_capacity: false,
...config,
},
elasticsearchAndSOAvailability$,

View file

@ -87,6 +87,7 @@ describe('managed configuration', () => {
request_timeouts: {
update_by_query: 1000,
},
auto_calculate_default_ech_capacity: false,
});
logger = context.logger.get('taskManager');
@ -209,6 +210,7 @@ describe('managed configuration', () => {
request_timeouts: {
update_by_query: 1000,
},
auto_calculate_default_ech_capacity: false,
});
logger = context.logger.get('taskManager');
@ -334,6 +336,7 @@ describe('managed configuration', () => {
request_timeouts: {
update_by_query: 1000,
},
auto_calculate_default_ech_capacity: false,
});
logger = context.logger.get('taskManager');

View file

@ -60,6 +60,7 @@ const config = {
request_timeouts: {
update_by_query: 1000,
},
auto_calculate_default_ech_capacity: false,
};
const getStatsWithTimestamp = ({

View file

@ -9,9 +9,10 @@ import { CLAIM_STRATEGY_UPDATE_BY_QUERY, CLAIM_STRATEGY_MGET, DEFAULT_CAPACITY }
import { getDefaultCapacity } from './get_default_capacity';
describe('getDefaultCapacity', () => {
it('returns default capacity when not in cloud', () => {
it('returns default capacity when autoCalculateDefaultEchCapacity=false', () => {
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: false,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: false,
@ -22,6 +23,7 @@ describe('getDefaultCapacity', () => {
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: false,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: true,
@ -32,6 +34,7 @@ describe('getDefaultCapacity', () => {
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: false,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: false,
@ -42,6 +45,53 @@ describe('getDefaultCapacity', () => {
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: false,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: true,
isBackgroundTaskNodeOnly: true,
claimStrategy: CLAIM_STRATEGY_MGET,
})
).toBe(DEFAULT_CAPACITY);
});
it('returns default capacity when not in cloud', () => {
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: false,
isBackgroundTaskNodeOnly: false,
claimStrategy: CLAIM_STRATEGY_MGET,
})
).toBe(DEFAULT_CAPACITY);
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: true,
isBackgroundTaskNodeOnly: false,
claimStrategy: CLAIM_STRATEGY_MGET,
})
).toBe(DEFAULT_CAPACITY);
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: false,
isBackgroundTaskNodeOnly: true,
claimStrategy: CLAIM_STRATEGY_MGET,
})
).toBe(DEFAULT_CAPACITY);
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: true,
@ -54,6 +104,7 @@ describe('getDefaultCapacity', () => {
it('returns default capacity when default claim strategy', () => {
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: true,
isServerless: false,
@ -64,6 +115,7 @@ describe('getDefaultCapacity', () => {
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: true,
isServerless: false,
@ -76,6 +128,7 @@ describe('getDefaultCapacity', () => {
it('returns default capacity when serverless', () => {
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: true,
@ -86,6 +139,7 @@ describe('getDefaultCapacity', () => {
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: true,
@ -96,6 +150,7 @@ describe('getDefaultCapacity', () => {
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: true,
isServerless: true,
@ -106,6 +161,7 @@ describe('getDefaultCapacity', () => {
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: true,
isServerless: true,
@ -119,6 +175,7 @@ describe('getDefaultCapacity', () => {
// 1GB
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: true,
isServerless: false,
@ -130,6 +187,7 @@ describe('getDefaultCapacity', () => {
// 1GB but somehow background task node only is true
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: true,
isServerless: false,
@ -141,6 +199,7 @@ describe('getDefaultCapacity', () => {
// 2GB
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 1702887424,
isCloud: true,
isServerless: false,
@ -152,6 +211,7 @@ describe('getDefaultCapacity', () => {
// 2GB but somehow background task node only is true
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 1702887424,
isCloud: true,
isServerless: false,
@ -163,6 +223,7 @@ describe('getDefaultCapacity', () => {
// 4GB
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 3405774848,
isCloud: true,
isServerless: false,
@ -174,6 +235,7 @@ describe('getDefaultCapacity', () => {
// 4GB background task only
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 3405774848,
isCloud: true,
isServerless: false,

View file

@ -8,6 +8,7 @@
import { CLAIM_STRATEGY_MGET, DEFAULT_CAPACITY } from '../config';
interface GetDefaultCapacityOpts {
autoCalculateDefaultEchCapacity: boolean;
claimStrategy?: string;
heapSizeLimit: number;
isCloud: boolean;
@ -24,6 +25,7 @@ const HEAP_TO_CAPACITY_MAP = [
];
export function getDefaultCapacity({
autoCalculateDefaultEchCapacity,
claimStrategy,
heapSizeLimit: heapSizeLimitInBytes,
isCloud,
@ -31,7 +33,12 @@ export function getDefaultCapacity({
isBackgroundTaskNodeOnly,
}: GetDefaultCapacityOpts) {
// perform heap size based calculations only in cloud
if (isCloud && !isServerless && claimStrategy === CLAIM_STRATEGY_MGET) {
if (
autoCalculateDefaultEchCapacity &&
isCloud &&
!isServerless &&
claimStrategy === CLAIM_STRATEGY_MGET
) {
// convert bytes to GB
const heapSizeLimitInGB = heapSizeLimitInBytes / 1e9;

View file

@ -78,6 +78,7 @@ const config: TaskManagerConfig = {
request_timeouts: {
update_by_query: 1000,
},
auto_calculate_default_ech_capacity: false,
};
describe('createAggregator', () => {

View file

@ -56,6 +56,7 @@ describe('Configuration Statistics Aggregator', () => {
request_timeouts: {
update_by_query: 1000,
},
auto_calculate_default_ech_capacity: false,
};
const managedConfig = {

View file

@ -87,6 +87,7 @@ const pluginInitializerContextParams = {
request_timeouts: {
update_by_query: 1000,
},
auto_calculate_default_ech_capacity: false,
};
describe('TaskManagerPlugin', () => {

View file

@ -286,6 +286,7 @@ export class TaskManagerPlugin
const isServerless = this.initContext.env.packageInfo.buildFlavor === 'serverless';
const defaultCapacity = getDefaultCapacity({
autoCalculateDefaultEchCapacity: this.config.auto_calculate_default_ech_capacity,
claimStrategy: this.config?.claim_strategy,
heapSizeLimit: this.heapSizeLimit,
isCloud: cloud?.isCloudEnabled ?? false,
@ -300,7 +301,9 @@ export class TaskManagerPlugin
this.config!.claim_strategy
} isBackgroundTaskNodeOnly=${this.isNodeBackgroundTasksOnly()} heapSizeLimit=${
this.heapSizeLimit
} defaultCapacity=${defaultCapacity}`
} defaultCapacity=${defaultCapacity} autoCalculateDefaultEchCapacity=${
this.config.auto_calculate_default_ech_capacity
}`
);
const managedConfiguration = createManagedConfiguration({

View file

@ -91,6 +91,7 @@ describe('TaskPollingLifecycle', () => {
request_timeouts: {
update_by_query: 1000,
},
auto_calculate_default_ech_capacity: false,
},
taskStore: mockTaskStore,
logger: taskManagerLogger,