mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 17:59:23 -04:00
# Backport This will backport the following commits from `main` to `8.x`: - [Put the auto calculation of capacity behind a feature flag, for now (#195390)](https://github.com/elastic/kibana/pull/195390) <!--- Backport version: 9.4.3 --> ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport) <!--BACKPORT [{"author":{"name":"Mike Côté","email":"mikecote@users.noreply.github.com"},"sourceCommit":{"committedDate":"2024-10-08T17:48:07Z","message":"Put the auto calculation of capacity behind a feature flag, for now (#195390)\n\nIn this PR, I'm preparing for the 8.16 release where we'd like to start\r\nrolling out the `mget` task claiming strategy separately from the added\r\nconcurrency. To accomplish this, we need to put the capacity calculation\r\nbehind a feature flag that is default to false for now, until we do a\r\nsecond rollout with an increased concurrency. The increased concurrency\r\ncan be calculated and adjusted based on experiments of clusters setting\r\n`xpack.task_manager.capacity` to a higher value and observe the resource\r\nusage.\r\n\r\nPR to deploy to Cloud and verify that we always default to 10 normal\r\ntasks: https://github.com/elastic/kibana/pull/195392","sha":"9c8f689aca23ed8b1f560c57a9a660d318375412","branchLabelMapping":{"^v9.0.0$":"main","^v8.16.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","Feature:Task Manager","Team:ResponseOps","v9.0.0","backport:prev-minor","v8.16.0"],"title":"Put the auto calculation of capacity behind a feature flag, for now","number":195390,"url":"https://github.com/elastic/kibana/pull/195390","mergeCommit":{"message":"Put the auto calculation of capacity behind a feature flag, for now (#195390)\n\nIn this PR, I'm preparing for the 8.16 release where we'd like to start\r\nrolling out the `mget` task claiming strategy separately from the added\r\nconcurrency. To accomplish this, we need to put the capacity calculation\r\nbehind a feature flag that is default to false for now, until we do a\r\nsecond rollout with an increased concurrency. The increased concurrency\r\ncan be calculated and adjusted based on experiments of clusters setting\r\n`xpack.task_manager.capacity` to a higher value and observe the resource\r\nusage.\r\n\r\nPR to deploy to Cloud and verify that we always default to 10 normal\r\ntasks: https://github.com/elastic/kibana/pull/195392","sha":"9c8f689aca23ed8b1f560c57a9a660d318375412"}},"sourceBranch":"main","suggestedTargetBranches":["8.x"],"targetPullRequestStates":[{"branch":"main","label":"v9.0.0","branchLabelMappingKey":"^v9.0.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/195390","number":195390,"mergeCommit":{"message":"Put the auto calculation of capacity behind a feature flag, for now (#195390)\n\nIn this PR, I'm preparing for the 8.16 release where we'd like to start\r\nrolling out the `mget` task claiming strategy separately from the added\r\nconcurrency. To accomplish this, we need to put the capacity calculation\r\nbehind a feature flag that is default to false for now, until we do a\r\nsecond rollout with an increased concurrency. The increased concurrency\r\ncan be calculated and adjusted based on experiments of clusters setting\r\n`xpack.task_manager.capacity` to a higher value and observe the resource\r\nusage.\r\n\r\nPR to deploy to Cloud and verify that we always default to 10 normal\r\ntasks: https://github.com/elastic/kibana/pull/195392","sha":"9c8f689aca23ed8b1f560c57a9a660d318375412"}},{"branch":"8.x","label":"v8.16.0","branchLabelMappingKey":"^v8.16.0$","isSourceBranch":false,"state":"NOT_CREATED"}]}] BACKPORT--> Co-authored-by: Mike Côté <mikecote@users.noreply.github.com>
This commit is contained in:
parent
de0f3dc765
commit
59444d03d6
13 changed files with 89 additions and 3 deletions
|
@ -416,6 +416,7 @@ kibana_vars=(
|
|||
xpack.spaces.maxSpaces
|
||||
xpack.task_manager.capacity
|
||||
xpack.task_manager.claim_strategy
|
||||
xpack.task_manager.auto_calculate_default_ech_capacity
|
||||
xpack.task_manager.discovery.active_nodes_lookback
|
||||
xpack.task_manager.discovery.interval
|
||||
xpack.task_manager.kibanas_per_partition
|
||||
|
|
|
@ -13,6 +13,7 @@ describe('config validation', () => {
|
|||
expect(configSchema.validate(config)).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"allow_reading_invalid_state": true,
|
||||
"auto_calculate_default_ech_capacity": false,
|
||||
"claim_strategy": "update_by_query",
|
||||
"discovery": Object {
|
||||
"active_nodes_lookback": "30s",
|
||||
|
@ -75,6 +76,7 @@ describe('config validation', () => {
|
|||
expect(configSchema.validate(config)).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"allow_reading_invalid_state": true,
|
||||
"auto_calculate_default_ech_capacity": false,
|
||||
"claim_strategy": "update_by_query",
|
||||
"discovery": Object {
|
||||
"active_nodes_lookback": "30s",
|
||||
|
@ -135,6 +137,7 @@ describe('config validation', () => {
|
|||
expect(configSchema.validate(config)).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"allow_reading_invalid_state": true,
|
||||
"auto_calculate_default_ech_capacity": false,
|
||||
"claim_strategy": "update_by_query",
|
||||
"discovery": Object {
|
||||
"active_nodes_lookback": "30s",
|
||||
|
|
|
@ -204,6 +204,7 @@ export const configSchema = schema.object(
|
|||
}),
|
||||
claim_strategy: schema.string({ defaultValue: CLAIM_STRATEGY_UPDATE_BY_QUERY }),
|
||||
request_timeouts: requestTimeoutsConfig,
|
||||
auto_calculate_default_ech_capacity: schema.boolean({ defaultValue: false }),
|
||||
},
|
||||
{
|
||||
validate: (config) => {
|
||||
|
|
|
@ -88,6 +88,7 @@ describe('EphemeralTaskLifecycle', () => {
|
|||
request_timeouts: {
|
||||
update_by_query: 1000,
|
||||
},
|
||||
auto_calculate_default_ech_capacity: false,
|
||||
...config,
|
||||
},
|
||||
elasticsearchAndSOAvailability$,
|
||||
|
|
|
@ -87,6 +87,7 @@ describe('managed configuration', () => {
|
|||
request_timeouts: {
|
||||
update_by_query: 1000,
|
||||
},
|
||||
auto_calculate_default_ech_capacity: false,
|
||||
});
|
||||
logger = context.logger.get('taskManager');
|
||||
|
||||
|
@ -209,6 +210,7 @@ describe('managed configuration', () => {
|
|||
request_timeouts: {
|
||||
update_by_query: 1000,
|
||||
},
|
||||
auto_calculate_default_ech_capacity: false,
|
||||
});
|
||||
logger = context.logger.get('taskManager');
|
||||
|
||||
|
@ -334,6 +336,7 @@ describe('managed configuration', () => {
|
|||
request_timeouts: {
|
||||
update_by_query: 1000,
|
||||
},
|
||||
auto_calculate_default_ech_capacity: false,
|
||||
});
|
||||
logger = context.logger.get('taskManager');
|
||||
|
||||
|
|
|
@ -60,6 +60,7 @@ const config = {
|
|||
request_timeouts: {
|
||||
update_by_query: 1000,
|
||||
},
|
||||
auto_calculate_default_ech_capacity: false,
|
||||
};
|
||||
|
||||
const getStatsWithTimestamp = ({
|
||||
|
|
|
@ -9,9 +9,10 @@ import { CLAIM_STRATEGY_UPDATE_BY_QUERY, CLAIM_STRATEGY_MGET, DEFAULT_CAPACITY }
|
|||
import { getDefaultCapacity } from './get_default_capacity';
|
||||
|
||||
describe('getDefaultCapacity', () => {
|
||||
it('returns default capacity when not in cloud', () => {
|
||||
it('returns default capacity when autoCalculateDefaultEchCapacity=false', () => {
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: false,
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: false,
|
||||
isServerless: false,
|
||||
|
@ -22,6 +23,7 @@ describe('getDefaultCapacity', () => {
|
|||
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: false,
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: false,
|
||||
isServerless: true,
|
||||
|
@ -32,6 +34,7 @@ describe('getDefaultCapacity', () => {
|
|||
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: false,
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: false,
|
||||
isServerless: false,
|
||||
|
@ -42,6 +45,53 @@ describe('getDefaultCapacity', () => {
|
|||
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: false,
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: false,
|
||||
isServerless: true,
|
||||
isBackgroundTaskNodeOnly: true,
|
||||
claimStrategy: CLAIM_STRATEGY_MGET,
|
||||
})
|
||||
).toBe(DEFAULT_CAPACITY);
|
||||
});
|
||||
|
||||
it('returns default capacity when not in cloud', () => {
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: true,
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: false,
|
||||
isServerless: false,
|
||||
isBackgroundTaskNodeOnly: false,
|
||||
claimStrategy: CLAIM_STRATEGY_MGET,
|
||||
})
|
||||
).toBe(DEFAULT_CAPACITY);
|
||||
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: true,
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: false,
|
||||
isServerless: true,
|
||||
isBackgroundTaskNodeOnly: false,
|
||||
claimStrategy: CLAIM_STRATEGY_MGET,
|
||||
})
|
||||
).toBe(DEFAULT_CAPACITY);
|
||||
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: true,
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: false,
|
||||
isServerless: false,
|
||||
isBackgroundTaskNodeOnly: true,
|
||||
claimStrategy: CLAIM_STRATEGY_MGET,
|
||||
})
|
||||
).toBe(DEFAULT_CAPACITY);
|
||||
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: true,
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: false,
|
||||
isServerless: true,
|
||||
|
@ -54,6 +104,7 @@ describe('getDefaultCapacity', () => {
|
|||
it('returns default capacity when default claim strategy', () => {
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: true,
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: true,
|
||||
isServerless: false,
|
||||
|
@ -64,6 +115,7 @@ describe('getDefaultCapacity', () => {
|
|||
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: true,
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: true,
|
||||
isServerless: false,
|
||||
|
@ -76,6 +128,7 @@ describe('getDefaultCapacity', () => {
|
|||
it('returns default capacity when serverless', () => {
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: true,
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: false,
|
||||
isServerless: true,
|
||||
|
@ -86,6 +139,7 @@ describe('getDefaultCapacity', () => {
|
|||
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: true,
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: false,
|
||||
isServerless: true,
|
||||
|
@ -96,6 +150,7 @@ describe('getDefaultCapacity', () => {
|
|||
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: true,
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: true,
|
||||
isServerless: true,
|
||||
|
@ -106,6 +161,7 @@ describe('getDefaultCapacity', () => {
|
|||
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: true,
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: true,
|
||||
isServerless: true,
|
||||
|
@ -119,6 +175,7 @@ describe('getDefaultCapacity', () => {
|
|||
// 1GB
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: true,
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: true,
|
||||
isServerless: false,
|
||||
|
@ -130,6 +187,7 @@ describe('getDefaultCapacity', () => {
|
|||
// 1GB but somehow background task node only is true
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: true,
|
||||
heapSizeLimit: 851443712,
|
||||
isCloud: true,
|
||||
isServerless: false,
|
||||
|
@ -141,6 +199,7 @@ describe('getDefaultCapacity', () => {
|
|||
// 2GB
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: true,
|
||||
heapSizeLimit: 1702887424,
|
||||
isCloud: true,
|
||||
isServerless: false,
|
||||
|
@ -152,6 +211,7 @@ describe('getDefaultCapacity', () => {
|
|||
// 2GB but somehow background task node only is true
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: true,
|
||||
heapSizeLimit: 1702887424,
|
||||
isCloud: true,
|
||||
isServerless: false,
|
||||
|
@ -163,6 +223,7 @@ describe('getDefaultCapacity', () => {
|
|||
// 4GB
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: true,
|
||||
heapSizeLimit: 3405774848,
|
||||
isCloud: true,
|
||||
isServerless: false,
|
||||
|
@ -174,6 +235,7 @@ describe('getDefaultCapacity', () => {
|
|||
// 4GB background task only
|
||||
expect(
|
||||
getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: true,
|
||||
heapSizeLimit: 3405774848,
|
||||
isCloud: true,
|
||||
isServerless: false,
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
import { CLAIM_STRATEGY_MGET, DEFAULT_CAPACITY } from '../config';
|
||||
|
||||
interface GetDefaultCapacityOpts {
|
||||
autoCalculateDefaultEchCapacity: boolean;
|
||||
claimStrategy?: string;
|
||||
heapSizeLimit: number;
|
||||
isCloud: boolean;
|
||||
|
@ -24,6 +25,7 @@ const HEAP_TO_CAPACITY_MAP = [
|
|||
];
|
||||
|
||||
export function getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity,
|
||||
claimStrategy,
|
||||
heapSizeLimit: heapSizeLimitInBytes,
|
||||
isCloud,
|
||||
|
@ -31,7 +33,12 @@ export function getDefaultCapacity({
|
|||
isBackgroundTaskNodeOnly,
|
||||
}: GetDefaultCapacityOpts) {
|
||||
// perform heap size based calculations only in cloud
|
||||
if (isCloud && !isServerless && claimStrategy === CLAIM_STRATEGY_MGET) {
|
||||
if (
|
||||
autoCalculateDefaultEchCapacity &&
|
||||
isCloud &&
|
||||
!isServerless &&
|
||||
claimStrategy === CLAIM_STRATEGY_MGET
|
||||
) {
|
||||
// convert bytes to GB
|
||||
const heapSizeLimitInGB = heapSizeLimitInBytes / 1e9;
|
||||
|
||||
|
|
|
@ -78,6 +78,7 @@ const config: TaskManagerConfig = {
|
|||
request_timeouts: {
|
||||
update_by_query: 1000,
|
||||
},
|
||||
auto_calculate_default_ech_capacity: false,
|
||||
};
|
||||
|
||||
describe('createAggregator', () => {
|
||||
|
|
|
@ -56,6 +56,7 @@ describe('Configuration Statistics Aggregator', () => {
|
|||
request_timeouts: {
|
||||
update_by_query: 1000,
|
||||
},
|
||||
auto_calculate_default_ech_capacity: false,
|
||||
};
|
||||
|
||||
const managedConfig = {
|
||||
|
|
|
@ -87,6 +87,7 @@ const pluginInitializerContextParams = {
|
|||
request_timeouts: {
|
||||
update_by_query: 1000,
|
||||
},
|
||||
auto_calculate_default_ech_capacity: false,
|
||||
};
|
||||
|
||||
describe('TaskManagerPlugin', () => {
|
||||
|
|
|
@ -286,6 +286,7 @@ export class TaskManagerPlugin
|
|||
const isServerless = this.initContext.env.packageInfo.buildFlavor === 'serverless';
|
||||
|
||||
const defaultCapacity = getDefaultCapacity({
|
||||
autoCalculateDefaultEchCapacity: this.config.auto_calculate_default_ech_capacity,
|
||||
claimStrategy: this.config?.claim_strategy,
|
||||
heapSizeLimit: this.heapSizeLimit,
|
||||
isCloud: cloud?.isCloudEnabled ?? false,
|
||||
|
@ -300,7 +301,9 @@ export class TaskManagerPlugin
|
|||
this.config!.claim_strategy
|
||||
} isBackgroundTaskNodeOnly=${this.isNodeBackgroundTasksOnly()} heapSizeLimit=${
|
||||
this.heapSizeLimit
|
||||
} defaultCapacity=${defaultCapacity}`
|
||||
} defaultCapacity=${defaultCapacity} autoCalculateDefaultEchCapacity=${
|
||||
this.config.auto_calculate_default_ech_capacity
|
||||
}`
|
||||
);
|
||||
|
||||
const managedConfiguration = createManagedConfiguration({
|
||||
|
|
|
@ -91,6 +91,7 @@ describe('TaskPollingLifecycle', () => {
|
|||
request_timeouts: {
|
||||
update_by_query: 1000,
|
||||
},
|
||||
auto_calculate_default_ech_capacity: false,
|
||||
},
|
||||
taskStore: mockTaskStore,
|
||||
logger: taskManagerLogger,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue