[8.x] [ML] Redesign start/update model deployment dialog (#190243) (#194143)

# Backport This will backport the following commits from `main` to `8.x`: - [[ML] Redesign start/update model deployment dialog (#190243)](https://github.com/elastic/kibana/pull/190243)  ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport)  Co-authored-by: Dima Arnautov <dmitrii.arnautov@elastic.co>
2025-04-23 09:19:04 -04:00 · 2024-09-27 00:58:37 +10:00 · 2024-09-27 00:58:37 +10:00 · e72fd8f18a
commit e72fd8f18a
parent ef7048f61a
25 changed files with 1757 additions and 502 deletions
--- a/x-pack/packages/ml/agg_utils/src/validate_number.test.ts
+++ b/x-pack/packages/ml/agg_utils/src/validate_number.test.ts
@ -8,6 +8,16 @@
 import { numberValidator } from './validate_number';

 describe('numberValidator', () => {
+  it('should allow an empty value if not required', () => {
+    const validator = numberValidator({ min: 1, integerOnly: true, required: false });
+    expect(validator(undefined)).toStrictEqual(null);
+  });
+
+  it('should not allow an empty value if required', () => {
+    const validator = numberValidator({ min: 1, integerOnly: true, required: true });
+    expect(validator(undefined)).toStrictEqual({ required: true });
+  });
+
  it('should only allow integers above zero', () => {
    const integerOnlyValidator = numberValidator({ min: 1, integerOnly: true });
    // invalid
--- a/x-pack/packages/ml/agg_utils/src/validate_number.ts
+++ b/x-pack/packages/ml/agg_utils/src/validate_number.ts
@ -12,16 +12,16 @@ import { isPopulatedObject } from '@kbn/ml-is-populated-object';
 * Represents the result of number validation.
 * @interface
 */
-export interface NumberValidationResult {
+export type NumberValidationResult = { [key: string]: boolean } & {
  /** The minimum allowed value. */
-  min: boolean;
+  min?: boolean;

  /** The maximum allowed value. */
-  max: boolean;
+  max?: boolean;

  /** Boolean flag to allow integer values only. */
-  integerOnly: boolean;
-}
+  integerOnly?: boolean;
+};

 /**
 * An interface describing conditions for validating numbers.
@ -42,6 +42,8 @@ interface NumberValidatorConditions {
   * Indicates whether only integer values are valid.
   */
  integerOnly?: boolean;
+
+  required?: boolean;
 }

 /**
@ -60,8 +62,18 @@ export function numberValidator(conditions?: NumberValidatorConditions) {
    throw new Error('Invalid validator conditions');
  }

-  return memoize((value: number): NumberValidationResult | null => {
+  return memoize((value: number | undefined): NumberValidationResult | null => {
    const result = {} as NumberValidationResult;
+
+    if (value === undefined) {
+      if (conditions?.required) {
+        result.required = true;
+        return result;
+      } else {
+        return null;
+      }
+    }
+
    if (conditions?.min !== undefined && value < conditions.min) {
      result.min = true;
    }
--- a/x-pack/plugins/ml/common/types/trained_models.ts
+++ b/x-pack/plugins/ml/common/types/trained_models.ts
@ -5,7 +5,7 @@
 * 2.0.
 */
 import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
-import type { DeploymentState, TrainedModelType } from '@kbn/ml-trained-models-utils';
+import type { TrainedModelType } from '@kbn/ml-trained-models-utils';
 import type {
  DataFrameAnalyticsConfig,
  FeatureImportanceBaseline,
@ -141,48 +141,46 @@ export interface InferenceConfigResponse {
  trained_model_configs: TrainedModelConfigResponse[];
 }

-export interface TrainedModelDeploymentStatsResponse {
-  model_id: string;
-  deployment_id: string;
-  inference_threads: number;
-  model_threads: number;
-  state: DeploymentState;
+type NodesDeploymentStats = Array<{
+  node: Record<
+    string,
+    {
+      transport_address: string;
+      roles: string[];
+      name: string;
+      attributes: {
+        'ml.machine_memory': string;
+        'xpack.installed': string;
+        'ml.max_open_jobs': string;
+        'ml.max_jvm_size': string;
+      };
+      ephemeral_id: string;
+    }
+  >;
+  inference_count: number;
+  routing_state: { routing_state: string };
+  average_inference_time_ms: number;
+  last_access: number;
+  number_of_pending_requests: number;
+  start_time: number;
+  throughput_last_minute: number;
  threads_per_allocation: number;
  number_of_allocations: number;
-  allocation_status: { target_allocation_count: number; state: string; allocation_count: number };
-  nodes: Array<{
-    node: Record<
-      string,
-      {
-        transport_address: string;
-        roles: string[];
-        name: string;
-        attributes: {
-          'ml.machine_memory': string;
-          'xpack.installed': string;
-          'ml.max_open_jobs': string;
-          'ml.max_jvm_size': string;
-        };
-        ephemeral_id: string;
-      }
-    >;
-    inference_count: number;
-    routing_state: { routing_state: string };
-    average_inference_time_ms: number;
-    last_access: number;
-    number_of_pending_requests: number;
-    start_time: number;
-    throughput_last_minute: number;
-    threads_per_allocation: number;
-    number_of_allocations: number;
-  }>;
-  reason?: string;
-}
+}>;
+
+export type TrainedModelDeploymentStatsResponse = estypes.MlTrainedModelDeploymentStats & {
+  nodes: NodesDeploymentStats;
+  // TODO update types in elasticsearch-specification
+  adaptive_allocations?: {
+    enabled: boolean;
+    min_number_of_allocations?: number;
+    max_number_of_allocations?: number;
+  };
+};

 export interface AllocatedModel {
  key: string;
  deployment_id: string;
-  inference_threads: number;
  allocation_status: {
    target_allocation_count: number;
    state: string;
@ -195,7 +193,6 @@ export interface AllocatedModel {
   */
  model_id?: string;
  state: string;
-  model_threads: number;
  model_size_bytes: number;
  required_native_memory_bytes: number;
  node: {
@ -217,6 +214,11 @@ export interface AllocatedModel {
    threads_per_allocation?: number;
    error_count?: number;
  };
+  adaptive_allocations?: {
+    enabled: boolean;
+    min_number_of_allocations?: number;
+    max_number_of_allocations?: number;
+  };
 }

 export interface NodeDeploymentStatsResponse {
--- a/x-pack/plugins/ml/public/application/components/jobs_awaiting_node_warning/new_job_awaiting_node_shared/new_job_awaiting_node_shared.tsx
+++ b/x-pack/plugins/ml/public/application/components/jobs_awaiting_node_warning/new_job_awaiting_node_shared/new_job_awaiting_node_shared.tsx
@ -71,6 +71,8 @@ const MLJobsAwaitingNodeWarning: FC<Props> = ({ jobIds }) => {
        cloudId,
        isCloudTrial,
        deploymentId: cloudId === null ? null : extractDeploymentId(cloudId),
+        isMlAutoscalingEnabled: resp.isMlAutoscalingEnabled,
+        cloudUrl: resp.cloudUrl ?? null,
      });
    } catch (error) {
      setCloudInfo(null);
--- a/x-pack/plugins/ml/public/application/components/node_available_warning/hooks.ts
+++ b/x-pack/plugins/ml/public/application/components/node_available_warning/hooks.ts
@ -70,6 +70,8 @@ const defaultCloudInfo: CloudInfo = {
  isCloud: false,
  isCloudTrial: false,
  deploymentId: null,
+  cloudUrl: null,
+  isMlAutoscalingEnabled: false,
 };

 export function useCloudCheck() {
@ -85,6 +87,8 @@ export function useCloudCheck() {
        isCloud: resp.cloudId !== undefined,
        isCloudTrial: resp.isCloudTrial === true,
        deploymentId: !resp.cloudId ? null : extractDeploymentId(resp.cloudId),
+        cloudUrl: resp.cloudUrl ?? null,
+        isMlAutoscalingEnabled: resp.isMlAutoscalingEnabled,
      });
    } catch (error) {
      if (error.statusCode === 403) {
--- a/x-pack/plugins/ml/public/application/memory_usage/nodes_overview/allocated_models.tsx
+++ b/x-pack/plugins/ml/public/application/memory_usage/nodes_overview/allocated_models.tsx
@ -126,7 +126,30 @@ export const AllocatedModels: FC<AllocatedModelsProps> = ({
        ) {
          return '-';
        }
-        return `${v.node.number_of_allocations} * ${v.node.threads_per_allocation}`;
+
+        let adaptiveAllocations = null;
+        if (v.adaptive_allocations?.enabled) {
+          adaptiveAllocations = (
+            <EuiToolTip
+              content={i18n.translate(
+                'xpack.ml.trainedModels.nodesList.modelsList.adaptiveAllocationsTooltip',
+                {
+                  defaultMessage: 'Adaptive allocations enabled',
+                }
+              )}
+            >
+              <EuiIcon size="l" color="warning" type="scale" />
+            </EuiToolTip>
+          );
+        }
+        return (
+          <>
+            <>
+              {v.node.number_of_allocations} * {v.node.threads_per_allocation}
+            </>
+            {adaptiveAllocations}
+          </>
+        );
      },
    },
    {
--- a/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.test.ts
+++ b/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.test.ts
@ -0,0 +1,573 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { MlTrainedModelAssignmentTaskParametersAdaptive } from './deployment_params_mapper';
+import { DeploymentParamsMapper } from './deployment_params_mapper';
+import type { CloudInfo } from '../services/ml_server_info';
+import type { MlServerLimits } from '../../../common/types/ml_server_info';
+
+describe('DeploymentParamsMapper', () => {
+  const modelId = 'test-model';
+
+  const mlServerLimits: MlServerLimits = {
+    max_single_ml_node_processors: 10,
+    total_ml_processors: 10,
+  };
+
+  const cloudInfo = {
+    isMlAutoscalingEnabled: false,
+  } as CloudInfo;
+
+  let mapper: DeploymentParamsMapper;
+
+  mapper = new DeploymentParamsMapper(modelId, mlServerLimits, cloudInfo, true);
+
+  describe('DeploymentParamsMapper', () => {
+    describe('running in serverless', () => {
+      beforeEach(() => {
+        mapper = new DeploymentParamsMapper(
+          modelId,
+          {
+            max_single_ml_node_processors: 16,
+            total_ml_processors: 32,
+          },
+          {
+            isMlAutoscalingEnabled: false,
+          } as CloudInfo,
+          false
+        );
+      });
+
+      it('should get correct VCU levels', () => {
+        expect(mapper.getVCURange('low')).toEqual({
+          min: 8,
+          max: 16,
+          static: 16,
+        });
+        expect(mapper.getVCURange('medium')).toEqual({
+          min: 24,
+          max: 256,
+          static: 256,
+        });
+        expect(mapper.getVCURange('high')).toEqual({
+          min: 264,
+          max: 4000,
+          static: 800,
+        });
+      });
+
+      it('should enforce adaptive allocations', () => {
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: false,
+            vCPUUsage: 'low',
+          })
+        ).toEqual({
+          adaptive_allocations: {
+            enabled: true,
+            max_number_of_allocations: 1,
+            min_number_of_allocations: 1,
+          },
+          deployment_id: 'test-deployment',
+          model_id: 'test-model',
+          priority: 'normal',
+          threads_per_allocation: 2,
+        });
+
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForIngest',
+            adaptiveResources: false,
+            vCPUUsage: 'low',
+          })
+        ).toEqual({
+          adaptive_allocations: {
+            enabled: true,
+            max_number_of_allocations: 2,
+            min_number_of_allocations: 1,
+          },
+          deployment_id: 'test-deployment',
+          model_id: 'test-model',
+          priority: 'normal',
+          threads_per_allocation: 1,
+        });
+      });
+    });
+
+    describe('32 cores, 16 single', () => {
+      beforeEach(() => {
+        mapper = new DeploymentParamsMapper(
+          modelId,
+          {
+            max_single_ml_node_processors: 16,
+            total_ml_processors: 32,
+          },
+          {
+            isMlAutoscalingEnabled: false,
+          } as CloudInfo,
+          true
+        );
+      });
+
+      it('should provide vCPU level', () => {
+        expect(mapper.getVCPURange('low')).toEqual({ min: 1, max: 2, static: 2 });
+        expect(mapper.getVCPURange('medium')).toEqual({ min: 3, max: 16, static: 16 });
+        expect(mapper.getVCPURange('high')).toEqual({ min: 17, max: 32, static: 32 });
+      });
+    });
+
+    describe('when autoscaling is disabled', () => {
+      beforeEach(() => {
+        mapper = new DeploymentParamsMapper(
+          modelId,
+          mlServerLimits,
+          {
+            isMlAutoscalingEnabled: false,
+          } as CloudInfo,
+          true
+        );
+      });
+
+      it('should map UI params to API request correctly', () => {
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: false,
+            vCPUUsage: 'low',
+          })
+        ).toEqual({
+          model_id: modelId,
+          deployment_id: 'test-deployment',
+          priority: 'normal',
+          threads_per_allocation: 2,
+          number_of_allocations: 1,
+        });
+
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: false,
+            vCPUUsage: 'medium',
+          })
+        ).toEqual({
+          model_id: modelId,
+          deployment_id: 'test-deployment',
+          priority: 'normal',
+          threads_per_allocation: 8,
+          number_of_allocations: 1,
+        });
+
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: false,
+            vCPUUsage: 'high',
+          })
+        ).toEqual({
+          model_id: modelId,
+          deployment_id: 'test-deployment',
+          priority: 'normal',
+          threads_per_allocation: 8,
+          number_of_allocations: 1,
+        });
+
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForIngest',
+            adaptiveResources: false,
+            vCPUUsage: 'low',
+          })
+        ).toEqual({
+          model_id: modelId,
+          deployment_id: 'test-deployment',
+          priority: 'normal',
+          threads_per_allocation: 1,
+          number_of_allocations: 2,
+        });
+
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForIngest',
+            adaptiveResources: false,
+            vCPUUsage: 'medium',
+          })
+        ).toEqual({
+          model_id: modelId,
+          deployment_id: 'test-deployment',
+          priority: 'normal',
+          threads_per_allocation: 1,
+          number_of_allocations: 5,
+        });
+
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForIngest',
+            adaptiveResources: false,
+            vCPUUsage: 'high',
+          })
+        ).toEqual({
+          model_id: modelId,
+          deployment_id: 'test-deployment',
+          priority: 'normal',
+          threads_per_allocation: 1,
+          number_of_allocations: 10,
+        });
+      });
+
+      it('should map UI params to API request correctly with adaptive resources', () => {
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: true,
+            vCPUUsage: 'low',
+          })
+        ).toEqual({
+          model_id: modelId,
+          deployment_id: 'test-deployment',
+          priority: 'normal',
+          threads_per_allocation: 2,
+          adaptive_allocations: {
+            enabled: true,
+            min_number_of_allocations: 1,
+            max_number_of_allocations: 1,
+          },
+        });
+
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: true,
+            vCPUUsage: 'medium',
+          })
+        ).toEqual({
+          model_id: modelId,
+          deployment_id: 'test-deployment',
+          priority: 'normal',
+          threads_per_allocation: 8,
+          adaptive_allocations: {
+            enabled: true,
+            min_number_of_allocations: 1,
+            max_number_of_allocations: 1,
+          },
+        });
+
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: true,
+            vCPUUsage: 'high',
+          })
+        ).toEqual({
+          model_id: modelId,
+          deployment_id: 'test-deployment',
+          priority: 'normal',
+          threads_per_allocation: 8,
+          adaptive_allocations: {
+            enabled: true,
+            min_number_of_allocations: 1,
+            max_number_of_allocations: 1,
+          },
+        });
+      });
+
+      describe('mapApiToUiDeploymentParams', () => {
+        it('should map API params to UI correctly', () => {
+          const input = {
+            model_id: modelId,
+            deployment_id: 'test-deployment',
+            priority: 'normal',
+            threads_per_allocation: 8,
+            number_of_allocations: 2,
+          } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive;
+          expect(mapper.mapApiToUiDeploymentParams(input)).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: false,
+            vCPUUsage: 'high',
+          });
+
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 1,
+              number_of_allocations: 1,
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForIngest',
+            adaptiveResources: false,
+            vCPUUsage: 'low',
+          });
+
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 2,
+              number_of_allocations: 2,
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: false,
+            vCPUUsage: 'medium',
+          });
+        });
+
+        it('should map API params to UI correctly with adaptive resources', () => {
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 8,
+              adaptive_allocations: {
+                enabled: true,
+                min_number_of_allocations: 2,
+                max_number_of_allocations: 2,
+              },
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: true,
+            vCPUUsage: 'high',
+          });
+
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 2,
+              adaptive_allocations: {
+                enabled: true,
+                min_number_of_allocations: 2,
+                max_number_of_allocations: 2,
+              },
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: true,
+            vCPUUsage: 'medium',
+          });
+
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 1,
+              adaptive_allocations: {
+                enabled: true,
+                min_number_of_allocations: 1,
+                max_number_of_allocations: 1,
+              },
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForIngest',
+            adaptiveResources: true,
+            vCPUUsage: 'low',
+          });
+        });
+      });
+    });
+
+    describe('when autoscaling is enabled', () => {
+      beforeEach(() => {
+        mapper = new DeploymentParamsMapper(
+          modelId,
+          mlServerLimits,
+          {
+            isMlAutoscalingEnabled: true,
+          } as CloudInfo,
+          true
+        );
+      });
+
+      it('should map UI params to API request correctly', () => {
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: false,
+            vCPUUsage: 'low',
+          })
+        ).toEqual({
+          model_id: modelId,
+          deployment_id: 'test-deployment',
+          priority: 'normal',
+          threads_per_allocation: 2,
+          number_of_allocations: 1,
+        });
+
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: false,
+            vCPUUsage: 'medium',
+          })
+        ).toEqual({
+          model_id: modelId,
+          deployment_id: 'test-deployment',
+          priority: 'normal',
+          threads_per_allocation: 8,
+          number_of_allocations: 4,
+        });
+
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: false,
+            vCPUUsage: 'high',
+          })
+        ).toEqual({
+          model_id: modelId,
+          deployment_id: 'test-deployment',
+          priority: 'normal',
+          threads_per_allocation: 8,
+          number_of_allocations: 12499,
+        });
+      });
+
+      it('should map UI params to API request correctly with adaptive resources', () => {
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: true,
+            vCPUUsage: 'low',
+          })
+        ).toEqual({
+          model_id: modelId,
+          deployment_id: 'test-deployment',
+          priority: 'normal',
+          threads_per_allocation: 2,
+          adaptive_allocations: {
+            enabled: true,
+            min_number_of_allocations: 1,
+            max_number_of_allocations: 1,
+          },
+        });
+
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: true,
+            vCPUUsage: 'medium',
+          })
+        ).toEqual({
+          model_id: modelId,
+          deployment_id: 'test-deployment',
+          priority: 'normal',
+          threads_per_allocation: 8,
+          adaptive_allocations: {
+            enabled: true,
+            min_number_of_allocations: 1,
+            max_number_of_allocations: 4,
+          },
+        });
+
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: true,
+            vCPUUsage: 'high',
+          })
+        ).toEqual({
+          model_id: modelId,
+          deployment_id: 'test-deployment',
+          priority: 'normal',
+          threads_per_allocation: 8,
+          adaptive_allocations: {
+            enabled: true,
+            max_number_of_allocations: 12499,
+            min_number_of_allocations: 4,
+          },
+        });
+
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForIngest',
+            adaptiveResources: true,
+            vCPUUsage: 'low',
+          })
+        ).toEqual({
+          model_id: modelId,
+          deployment_id: 'test-deployment',
+          priority: 'normal',
+          threads_per_allocation: 1,
+          adaptive_allocations: {
+            enabled: true,
+            min_number_of_allocations: 1,
+            max_number_of_allocations: 2,
+          },
+        });
+
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForIngest',
+            adaptiveResources: true,
+            vCPUUsage: 'medium',
+          })
+        ).toEqual({
+          model_id: modelId,
+          deployment_id: 'test-deployment',
+          priority: 'normal',
+          threads_per_allocation: 1,
+          adaptive_allocations: {
+            enabled: true,
+            min_number_of_allocations: 3,
+            max_number_of_allocations: 32,
+          },
+        });
+
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForIngest',
+            adaptiveResources: true,
+            vCPUUsage: 'high',
+          })
+        ).toEqual({
+          model_id: modelId,
+          deployment_id: 'test-deployment',
+          priority: 'normal',
+          threads_per_allocation: 1,
+          adaptive_allocations: {
+            enabled: true,
+            min_number_of_allocations: 33,
+            max_number_of_allocations: 99999,
+          },
+        });
+      });
+    });
+  });
+});
--- a/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.ts
+++ b/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.ts
@ -0,0 +1,230 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { MlStartTrainedModelDeploymentRequest } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
+import type { TrainedModelDeploymentStatsResponse } from '../../../common/types/trained_models';
+import type { CloudInfo } from '../services/ml_server_info';
+import type { MlServerLimits } from '../../../common/types/ml_server_info';
+import type { AdaptiveAllocations } from '../../../server/lib/ml_client/types';
+import type { DeploymentParamsUI } from './deployment_setup';
+
+export type MlStartTrainedModelDeploymentRequestNew = MlStartTrainedModelDeploymentRequest &
+  AdaptiveAllocations;
+
+const THREADS_MAX_EXPONENT = 5;
+
+// TODO set to 0 when https://github.com/elastic/elasticsearch/pull/113455 is merged
+const MIN_SUPPORTED_NUMBER_OF_ALLOCATIONS = 1;
+
+type VCPUBreakpoints = Record<
+  DeploymentParamsUI['vCPUUsage'],
+  {
+    min: number;
+    max: number;
+    /** Static value is used for the number of vCPUs when the adaptive resources are disabled */
+    static: number;
+  }
+>;
+
+type BreakpointValues = VCPUBreakpoints[keyof VCPUBreakpoints];
+
+/**
+ * Class responsible for mapping deployment params between API and UI
+ */
+export class DeploymentParamsMapper {
+  private readonly threadingParamsValues: number[];
+
+  /**
+   * vCPUs level breakpoints for cloud cluster with enabled ML autoscaling
+   */
+  private readonly autoscalingVCPUBreakpoints: VCPUBreakpoints = {
+    low: { min: MIN_SUPPORTED_NUMBER_OF_ALLOCATIONS, max: 2, static: 2 },
+    medium: { min: 3, max: 32, static: 32 },
+    high: { min: 33, max: 99999, static: 100 },
+  };
+
+  /**
+   * vCPUs level breakpoints for serverless projects
+   */
+  private readonly serverlessVCPUBreakpoints: VCPUBreakpoints = {
+    low: { min: MIN_SUPPORTED_NUMBER_OF_ALLOCATIONS, max: 2, static: 2 },
+    medium: { min: 3, max: 32, static: 32 },
+    high: { min: 33, max: 500, static: 100 },
+  };
+
+  /**
+   * vCPUs level breakpoints based on the ML server limits.
+   * Either on-prem or cloud with disabled ML autoscaling
+   */
+  private readonly hardwareVCPUBreakpoints: VCPUBreakpoints;
+
+  /**
+   * Result vCPUs level breakpoint based on the cluster env.
+   */
+  private readonly vCpuBreakpoints: VCPUBreakpoints;
+
+  constructor(
+    private readonly modelId: string,
+    private readonly mlServerLimits: MlServerLimits,
+    private readonly cloudInfo: CloudInfo,
+    private readonly showNodeInfo: boolean
+  ) {
+    const maxSingleMlNodeProcessors = this.mlServerLimits.max_single_ml_node_processors;
+
+    this.threadingParamsValues = new Array(THREADS_MAX_EXPONENT)
+      .fill(null)
+      .map((v, i) => Math.pow(2, i))
+      .filter(maxSingleMlNodeProcessors ? (v) => v <= maxSingleMlNodeProcessors : (v) => true);
+
+    const mediumValue = this.mlServerLimits!.total_ml_processors! / 2;
+
+    this.hardwareVCPUBreakpoints = {
+      low: { min: MIN_SUPPORTED_NUMBER_OF_ALLOCATIONS, max: 2, static: 2 },
+      medium: { min: Math.min(3, mediumValue), max: mediumValue, static: mediumValue },
+      high: {
+        min: mediumValue + 1,
+        max: this.mlServerLimits!.total_ml_processors!,
+        static: this.mlServerLimits!.total_ml_processors!,
+      },
+    };
+
+    if (!this.showNodeInfo) {
+      this.vCpuBreakpoints = this.serverlessVCPUBreakpoints;
+    } else if (this.cloudInfo.isMlAutoscalingEnabled) {
+      this.vCpuBreakpoints = this.autoscalingVCPUBreakpoints;
+    } else {
+      this.vCpuBreakpoints = this.hardwareVCPUBreakpoints;
+    }
+  }
+
+  private getNumberOfThreads(input: DeploymentParamsUI): number {
+    // 1 thread for ingest at all times
+    if (input.optimized === 'optimizedForIngest') return 1;
+    // for search deployments with low vCPUs level set 2, otherwise max available
+    return input.vCPUUsage === 'low' ? 2 : Math.max(...this.threadingParamsValues);
+  }
+
+  private getAllocationsParams(
+    params: DeploymentParamsUI
+  ): Pick<MlStartTrainedModelDeploymentRequestNew, 'number_of_allocations'> &
+    Pick<
+      Exclude<MlStartTrainedModelDeploymentRequestNew['adaptive_allocations'], undefined>,
+      'min_number_of_allocations' | 'max_number_of_allocations'
+    > {
+    const threadsPerAllocation = this.getNumberOfThreads(params);
+
+    const levelValues = this.vCpuBreakpoints[params.vCPUUsage];
+
+    const maxValue = Math.floor(levelValues.max / threadsPerAllocation) || 1;
+
+    return {
+      number_of_allocations: maxValue,
+      min_number_of_allocations:
+        Math.floor(levelValues.min / threadsPerAllocation) ||
+        // in any env, allow scale down to 0 only for "low" vCPU usage
+        (params.vCPUUsage === 'low' ? MIN_SUPPORTED_NUMBER_OF_ALLOCATIONS : 1),
+      max_number_of_allocations: maxValue,
+    };
+  }
+
+  /**
+   * Gets vCPU (virtual CPU) range based on the vCPU usage level
+   * @param vCPUUsage
+   * @returns
+   */
+  public getVCPURange(vCPUUsage: DeploymentParamsUI['vCPUUsage']) {
+    return this.vCpuBreakpoints[vCPUUsage];
+  }
+
+  /**
+   * Gets VCU (Virtual Compute Units) range based on the vCPU usage level
+   * @param vCPUUsage
+   * @returns
+   */
+  public getVCURange(vCPUUsage: DeploymentParamsUI['vCPUUsage']) {
+    // general purpose (c6gd) 1VCU = 1GB RAM / 0.5 vCPU
+    // vector optimized (r6gd) 1VCU = 1GB RAM / 0.125 vCPU
+    const vCPUBreakpoints = this.serverlessVCPUBreakpoints[vCPUUsage];
+
+    return Object.entries(vCPUBreakpoints).reduce((acc, [key, val]) => {
+      // as we can't retrieve Search project configuration, we assume that the vector optimized instance is used
+      acc[key as keyof BreakpointValues] = Math.round(val / 0.125);
+      return acc;
+    }, {} as BreakpointValues);
+  }
+
+  /**
+   * Maps UI params to the actual start deployment API request
+   * @param input
+   */
+  public mapUiToApiDeploymentParams(
+    input: DeploymentParamsUI
+  ): MlStartTrainedModelDeploymentRequestNew {
+    const resultInput: DeploymentParamsUI = Object.create(input);
+    if (!this.showNodeInfo) {
+      // Enforce adaptive resources for serverless
+      resultInput.adaptiveResources = true;
+    }
+
+    const allocationParams = this.getAllocationsParams(resultInput);
+
+    return {
+      model_id: this.modelId,
+      deployment_id: resultInput.deploymentId,
+      priority: 'normal',
+      threads_per_allocation: this.getNumberOfThreads(resultInput),
+      ...(resultInput.adaptiveResources || !this.showNodeInfo
+        ? {
+            adaptive_allocations: {
+              enabled: true,
+              min_number_of_allocations: allocationParams.min_number_of_allocations,
+              max_number_of_allocations: allocationParams.max_number_of_allocations,
+            },
+          }
+        : {
+            number_of_allocations: allocationParams.number_of_allocations,
+          }),
+    };
+  }
+
+  /**
+   * Maps deployment params from API to the UI
+   * @param input
+   */
+  public mapApiToUiDeploymentParams(
+    input: MlTrainedModelAssignmentTaskParametersAdaptive
+  ): DeploymentParamsUI {
+    let optimized: DeploymentParamsUI['optimized'] = 'optimizedForIngest';
+    if (input.threads_per_allocation > 1) {
+      optimized = 'optimizedForSearch';
+    }
+    const adaptiveResources = !!input.adaptive_allocations?.enabled;
+
+    const vCPUs =
+      input.threads_per_allocation *
+      (adaptiveResources
+        ? input.adaptive_allocations!.max_number_of_allocations!
+        : input.number_of_allocations);
+
+    const [vCPUUsage] = Object.entries(this.vCpuBreakpoints)
+      .reverse()
+      .find(([key, val]) => vCPUs >= val.min) as [
+      DeploymentParamsUI['vCPUUsage'],
+      { min: number; max: number }
+    ];
+
+    return {
+      deploymentId: input.deployment_id,
+      optimized,
+      adaptiveResources,
+      vCPUUsage,
+    };
+  }
+}
+
+export type MlTrainedModelAssignmentTaskParametersAdaptive = TrainedModelDeploymentStatsResponse &
+  AdaptiveAllocations;
--- a/x-pack/plugins/ml/public/application/model_management/deployment_setup.tsx
+++ b/x-pack/plugins/ml/public/application/model_management/deployment_setup.tsx
--- a/x-pack/plugins/ml/public/application/model_management/model_actions.tsx
+++ b/x-pack/plugins/ml/public/application/model_management/model_actions.tsx
@ -20,6 +20,7 @@ import {
  getAnalysisType,
  type DataFrameAnalysisConfigType,
 } from '@kbn/ml-data-frame-analytics-utils';
+import { useEnabledFeatures } from '../contexts/ml';
 import { useTrainedModelsApiService } from '../services/ml_api_service/trained_models';
 import { getUserConfirmationProvider } from './force_stop_dialog';
 import { useToastNotificationService } from '../services/toast_notification_service';
@ -29,6 +30,7 @@ import { ML_PAGES } from '../../../common/constants/locator';
 import { isTestable, isDfaTrainedModel } from './test_models';
 import type { ModelItem } from './models_list';
 import { usePermissionCheck } from '../capabilities/check_capabilities';
+import { useCloudCheck } from '../components/node_available_warning/hooks';

 export function useModelActions({
  onDfaTestAction,
@ -61,6 +63,10 @@ export function useModelActions({
    },
  } = useMlKibana();

+  const { showNodeInfo } = useEnabledFeatures();
+
+  const cloudInfo = useCloudCheck();
+
  const [
    canCreateTrainedModels,
    canStartStopTrainedModels,
@ -114,9 +120,11 @@ export function useModelActions({
      getUserInputModelDeploymentParamsProvider(
        overlays,
        startServices,
-        startModelDeploymentDocUrl
+        startModelDeploymentDocUrl,
+        cloudInfo,
+        showNodeInfo
      ),
-    [overlays, startServices, startModelDeploymentDocUrl]
+    [overlays, startServices, startModelDeploymentDocUrl, cloudInfo, showNodeInfo]
  );

  const isBuiltInModel = useCallback(
@ -212,14 +220,20 @@ export function useModelActions({

          try {
            onLoading(true);
-            await trainedModelsApiService.startModelAllocation(item.model_id, {
-              number_of_allocations: modelDeploymentParams.numOfAllocations,
-              threads_per_allocation: modelDeploymentParams.threadsPerAllocations!,
-              priority: modelDeploymentParams.priority!,
-              deployment_id: !!modelDeploymentParams.deploymentId
-                ? modelDeploymentParams.deploymentId
-                : item.model_id,
-            });
+            await trainedModelsApiService.startModelAllocation(
+              item.model_id,
+              {
+                priority: modelDeploymentParams.priority!,
+                threads_per_allocation: modelDeploymentParams.threads_per_allocation!,
+                number_of_allocations: modelDeploymentParams.number_of_allocations,
+                deployment_id: modelDeploymentParams.deployment_id,
+              },
+              {
+                ...(modelDeploymentParams.adaptive_allocations?.enabled
+                  ? { adaptive_allocations: modelDeploymentParams.adaptive_allocations }
+                  : {}),
+              }
+            );
            displaySuccessToast(
              i18n.translate('xpack.ml.trainedModels.modelsList.startSuccess', {
                defaultMessage: 'Deployment for "{modelId}" has been started successfully.',
@ -263,24 +277,29 @@ export function useModelActions({
          !isLoading &&
          !!item.stats?.deployment_stats?.some((v) => v.state === DEPLOYMENT_STATE.STARTED),
        onClick: async (item) => {
-          const deploymentToUpdate = item.deployment_ids[0];
+          const deploymentIdToUpdate = item.deployment_ids[0];

-          const deploymentParams = await getUserInputModelDeploymentParams(item, {
-            deploymentId: deploymentToUpdate,
-            numOfAllocations: item.stats!.deployment_stats.find(
-              (v) => v.deployment_id === deploymentToUpdate
-            )!.number_of_allocations,
-          });
+          const targetDeployment = item.stats!.deployment_stats.find(
+            (v) => v.deployment_id === deploymentIdToUpdate
+          )!;
+
+          const deploymentParams = await getUserInputModelDeploymentParams(item, targetDeployment);

          if (!deploymentParams) return;

          try {
            onLoading(true);
+
            await trainedModelsApiService.updateModelDeployment(
              item.model_id,
-              deploymentParams.deploymentId!,
+              deploymentParams.deployment_id!,
              {
-                number_of_allocations: deploymentParams.numOfAllocations,
+                ...(deploymentParams.adaptive_allocations
+                  ? { adaptive_allocations: deploymentParams.adaptive_allocations }
+                  : {
+                      number_of_allocations: deploymentParams.number_of_allocations!,
+                      adaptive_allocations: { enabled: false },
+                    }),
              }
            );
            displaySuccessToast(
--- a/x-pack/plugins/ml/public/application/model_management/models_list.tsx
+++ b/x-pack/plugins/ml/public/application/model_management/models_list.tsx
@ -870,7 +870,8 @@ export const ModelsList: FC<Props> = ({
    if (pageState.showAll) {
      return items;
    } else {
-      return items.filter((item) => item.supported !== false);
+      // by default show only deployed models or recommended for download
+      return items.filter((item) => item.create_time || item.recommended);
    }
  }, [items, pageState.showAll]);

@ -896,6 +897,7 @@ export const ModelsList: FC<Props> = ({
                  }
                  checked={!!pageState.showAll}
                  onChange={(e) => updatePageState({ showAll: e.target.checked })}
+                  data-test-subj="mlModelsShowAllSwitch"
                />
              </EuiFlexItem>
            </EuiFlexGroup>
--- a/x-pack/plugins/ml/public/application/services/ml_api_service/index.ts
+++ b/x-pack/plugins/ml/public/application/services/ml_api_service/index.ts
@ -69,6 +69,8 @@ export interface MlInfoResponse {
  upgrade_mode: boolean;
  cloudId?: string;
  isCloudTrial?: boolean;
+  cloudUrl?: string;
+  isMlAutoscalingEnabled: boolean;
 }

 export interface BucketSpanEstimatorResponse {
--- a/x-pack/plugins/ml/public/application/services/ml_api_service/trained_models.ts
+++ b/x-pack/plugins/ml/public/application/services/ml_api_service/trained_models.ts
@ -56,6 +56,26 @@ export interface InferenceStatsResponse {
  trained_model_stats: TrainedModelStat[];
 }

+// eslint-disable-next-line @typescript-eslint/consistent-type-definitions
+export type CommonDeploymentParams = {
+  deployment_id?: string;
+  threads_per_allocation: number;
+  priority: 'low' | 'normal';
+  number_of_allocations?: number;
+};
+
+export interface AdaptiveAllocationsParams {
+  adaptive_allocations?: {
+    enabled: boolean;
+    min_number_of_allocations?: number;
+    max_number_of_allocations?: number;
+  };
+}
+
+export interface UpdateAllocationParams extends AdaptiveAllocationsParams {
+  number_of_allocations?: number;
+}
+
 /**
 * Service with APIs calls to perform operations with trained models.
 * @param httpService
@ -199,17 +219,14 @@ export function trainedModelsApiProvider(httpService: HttpService) {

    startModelAllocation(
      modelId: string,
-      queryParams?: {
-        number_of_allocations: number;
-        threads_per_allocation: number;
-        priority: 'low' | 'normal';
-        deployment_id?: string;
-      }
+      queryParams?: CommonDeploymentParams,
+      bodyParams?: AdaptiveAllocationsParams
    ) {
      return httpService.http<{ acknowledge: boolean }>({
        path: `${ML_INTERNAL_BASE_PATH}/trained_models/${modelId}/deployment/_start`,
        method: 'POST',
        query: queryParams,
+        ...(bodyParams ? { body: JSON.stringify(bodyParams) } : {}),
        version: '1',
      });
    },
@ -231,11 +248,7 @@ export function trainedModelsApiProvider(httpService: HttpService) {
      });
    },

-    updateModelDeployment(
-      modelId: string,
-      deploymentId: string,
-      params: { number_of_allocations: number }
-    ) {
+    updateModelDeployment(modelId: string, deploymentId: string, params: UpdateAllocationParams) {
      return httpService.http<{ acknowledge: boolean }>({
        path: `${ML_INTERNAL_BASE_PATH}/trained_models/${modelId}/${deploymentId}/deployment/_update`,
        method: 'POST',
--- a/x-pack/plugins/ml/public/application/services/ml_server_info.ts
+++ b/x-pack/plugins/ml/public/application/services/ml_server_info.ts
@ -13,6 +13,8 @@ export interface CloudInfo {
  isCloud: boolean;
  isCloudTrial: boolean;
  deploymentId: string | null;
+  cloudUrl: string | null;
+  isMlAutoscalingEnabled: boolean;
 }

 let defaults: MlServerDefaults = {
@ -26,6 +28,8 @@ const cloudInfo: CloudInfo = {
  isCloud: false,
  isCloudTrial: false,
  deploymentId: null,
+  cloudUrl: null,
+  isMlAutoscalingEnabled: false,
 };

 export async function loadMlServerInfo(mlApi: MlApi) {
--- a/x-pack/plugins/ml/server/lib/ml_client/ml_client.ts
+++ b/x-pack/plugins/ml/server/lib/ml_client/ml_client.ts
@ -8,7 +8,7 @@
 import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
 import type { IScopedClusterClient } from '@kbn/core/server';
 import type { DataFrameAnalyticsConfig } from '@kbn/ml-data-frame-analytics-utils';
-
+import { isPopulatedObject } from '@kbn/ml-is-populated-object';
 import type { MLSavedObjectService } from '../../saved_objects';
 import { getJobDetailsFromTrainedModel } from '../../saved_objects/util';
 import type { JobType } from '../../../common/types/saved_objects';
@ -500,16 +500,29 @@ export function getMlClient(
    },
    async startTrainedModelDeployment(...p: Parameters<MlClient['startTrainedModelDeployment']>) {
      await modelIdsCheck(p);
-      return mlClient.startTrainedModelDeployment(...p);
+      // TODO use mlClient.startTrainedModelDeployment when esClient is updated
+      const { model_id: modelId, adaptive_allocations: adaptiveAllocations, ...queryParams } = p[0];
+      return client.asInternalUser.transport.request<estypes.MlStartTrainedModelDeploymentResponse>(
+        {
+          method: 'POST',
+          path: `_ml/trained_models/${modelId}/deployment/_start`,
+          ...(isPopulatedObject(queryParams) ? { querystring: queryParams } : {}),
+          ...(isPopulatedObject(adaptiveAllocations)
+            ? { body: { adaptive_allocations: adaptiveAllocations } }
+            : {}),
+        },
+        p[1]
+      );
    },
    async updateTrainedModelDeployment(...p: Parameters<MlClient['updateTrainedModelDeployment']>) {
      await modelIdsCheck(p);

-      const { deployment_id: deploymentId, number_of_allocations: numberOfAllocations } = p[0];
+      const { deployment_id: deploymentId, model_id: modelId, ...bodyParams } = p[0];
+      // TODO use mlClient.updateTrainedModelDeployment when esClient is updated
      return client.asInternalUser.transport.request({
        method: 'POST',
        path: `/_ml/trained_models/${deploymentId}/deployment/_update`,
-        body: { number_of_allocations: numberOfAllocations },
+        body: bodyParams,
      });
    },
    async stopTrainedModelDeployment(...p: Parameters<MlClient['stopTrainedModelDeployment']>) {
--- a/x-pack/plugins/ml/server/lib/ml_client/types.ts
+++ b/x-pack/plugins/ml/server/lib/ml_client/types.ts
@ -5,16 +5,28 @@
 * 2.0.
 */

-import type { TransportRequestOptionsWithMeta } from '@elastic/elasticsearch';
+import type {
+  TransportRequestOptionsWithMeta,
+  TransportRequestOptions,
+} from '@elastic/elasticsearch';
 import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
 import type { ElasticsearchClient } from '@kbn/core/server';
 import type { searchProvider } from './search';

 type OrigMlClient = ElasticsearchClient['ml'];
-export interface UpdateTrainedModelDeploymentRequest {
+
+export interface AdaptiveAllocations {
+  adaptive_allocations?: {
+    enabled: boolean;
+    min_number_of_allocations?: number;
+    max_number_of_allocations?: number;
+  };
+}
+
+export interface UpdateTrainedModelDeploymentRequest extends AdaptiveAllocations {
  model_id: string;
  deployment_id?: string;
-  number_of_allocations: number;
+  number_of_allocations?: number;
 }
 export interface UpdateTrainedModelDeploymentResponse {
  acknowledge: boolean;
@ -36,6 +48,10 @@ export interface MlClient
  updateTrainedModelDeployment: (
    payload: UpdateTrainedModelDeploymentRequest
  ) => Promise<UpdateTrainedModelDeploymentResponse>;
+  startTrainedModelDeployment: (
+    payload: estypes.MlStartTrainedModelDeploymentRequest & AdaptiveAllocations,
+    options?: TransportRequestOptions
+  ) => Promise<estypes.MlStartTrainedModelDeploymentResponse>;
  stopTrainedModelDeployment: (
    p: MlStopTrainedModelDeploymentRequest,
    options?: TransportRequestOptionsWithMeta
--- a/x-pack/plugins/ml/server/routes/schemas/inference_schema.ts
+++ b/x-pack/plugins/ml/server/routes/schemas/inference_schema.ts
@ -24,7 +24,7 @@ export const createInferenceSchema = schema.object({
  inferenceId: schema.string(),
 });

-export const threadingParamsSchema = schema.maybe(
+export const threadingParamsQuerySchema = schema.maybe(
  schema.object({
    number_of_allocations: schema.maybe(schema.number()),
    threads_per_allocation: schema.maybe(schema.number()),
@ -33,8 +33,27 @@ export const threadingParamsSchema = schema.maybe(
  })
 );

+export const threadingParamsBodySchema = schema.nullable(
+  schema.object({
+    adaptive_allocations: schema.maybe(
+      schema.object({
+        enabled: schema.boolean(),
+        min_number_of_allocations: schema.maybe(schema.number()),
+        max_number_of_allocations: schema.maybe(schema.number()),
+      })
+    ),
+  })
+);
+
 export const updateDeploymentParamsSchema = schema.object({
-  number_of_allocations: schema.number(),
+  number_of_allocations: schema.maybe(schema.number()),
+  adaptive_allocations: schema.maybe(
+    schema.object({
+      enabled: schema.boolean(),
+      min_number_of_allocations: schema.maybe(schema.number()),
+      max_number_of_allocations: schema.maybe(schema.number()),
+    })
+  ),
 });

 export const optionalModelIdSchema = schema.object({
--- a/x-pack/plugins/ml/server/routes/system.ts
+++ b/x-pack/plugins/ml/server/routes/system.ts
@ -166,14 +166,28 @@ export function systemRoutes(
        version: '1',
        validate: false,
      },
-      routeGuard.basicLicenseAPIGuard(async ({ mlClient, response }) => {
+      routeGuard.basicLicenseAPIGuard(async ({ mlClient, response, client }) => {
        try {
          const body = await mlClient.info();
          const cloudId = cloud?.cloudId;
          const isCloudTrial = cloud?.trialEndDate && Date.now() < cloud.trialEndDate.getTime();

+          let isMlAutoscalingEnabled = false;
+          try {
+            await client.asInternalUser.autoscaling.getAutoscalingPolicy({ name: 'ml' });
+            isMlAutoscalingEnabled = true;
+          } catch (e) {
+            // If doesn't exist, then keep the false
+          }
+
          return response.ok({
-            body: { ...body, cloudId, isCloudTrial },
+            body: {
+              ...body,
+              cloudId,
+              isCloudTrial,
+              cloudUrl: cloud.baseUrl,
+              isMlAutoscalingEnabled,
+            },
          });
        } catch (error) {
          return response.customError(wrapError(error));
--- a/x-pack/plugins/ml/server/routes/trained_models.ts
+++ b/x-pack/plugins/ml/server/routes/trained_models.ts
@ -21,21 +21,22 @@ import { type MlFeatures, ML_INTERNAL_BASE_PATH } from '../../common/constants/a
 import type { RouteInitialization } from '../types';
 import { wrapError } from '../client/error_wrapper';
 import {
+  createIngestPipelineSchema,
+  curatedModelsParamsSchema,
+  curatedModelsQuerySchema,
  deleteTrainedModelQuerySchema,
  getInferenceQuerySchema,
  inferTrainedModelBody,
  inferTrainedModelQuery,
  modelAndDeploymentIdSchema,
+  modelDownloadsQuery,
  modelIdSchema,
  optionalModelIdSchema,
  pipelineSimulateBody,
  putTrainedModelQuerySchema,
-  threadingParamsSchema,
+  threadingParamsBodySchema,
+  threadingParamsQuerySchema,
  updateDeploymentParamsSchema,
-  createIngestPipelineSchema,
-  modelDownloadsQuery,
-  curatedModelsParamsSchema,
-  curatedModelsQuerySchema,
 } from './schemas/inference_schema';
 import type { PipelineDefinition } from '../../common/types/trained_models';
 import { type TrainedModelConfigResponse } from '../../common/types/trained_models';
@ -534,22 +535,27 @@ export function trainedModelsRoutes(
        validate: {
          request: {
            params: modelIdSchema,
-            query: threadingParamsSchema,
+            query: threadingParamsQuerySchema,
+            body: threadingParamsBodySchema,
          },
        },
      },
      routeGuard.fullLicenseAPIGuard(async ({ mlClient, request, response }) => {
        try {
          const { modelId } = request.params;
+
+          // TODO use mlClient.startTrainedModelDeployment when esClient is updated
          const body = await mlClient.startTrainedModelDeployment(
            {
              model_id: modelId,
              ...(request.query ? request.query : {}),
+              ...(request.body ? request.body : {}),
            },
            {
              maxRetries: 0,
            }
          );
+
          return response.ok({
            body,
          });
@ -584,6 +590,7 @@ export function trainedModelsRoutes(
            deployment_id: deploymentId,
            ...request.body,
          });
+
          return response.ok({
            body,
          });
--- a/x-pack/plugins/translations/translations/fr-FR.json
+++ b/x-pack/plugins/translations/translations/fr-FR.json
@ -30175,24 +30175,10 @@
    "xpack.ml.trainedModels.modelsList.selectedModelsMessage": "{modelsCount, plural, one{# modèle sélectionné} other {# modèles sélectionnés}}",
    "xpack.ml.trainedModels.modelsList.startDeployment.cancelButton": "Annuler",
    "xpack.ml.trainedModels.modelsList.startDeployment.deploymentIdError": "Un déploiement avec cet ID existe déjà.",
-    "xpack.ml.trainedModels.modelsList.startDeployment.deploymentIdHelp": "Spécifiez l'identificateur unique pour le déploiement de modèle.",
    "xpack.ml.trainedModels.modelsList.startDeployment.deploymentIdLabel": "ID de déploiement",
    "xpack.ml.trainedModels.modelsList.startDeployment.docLinkTitle": "En savoir plus",
-    "xpack.ml.trainedModels.modelsList.startDeployment.lowPriorityLabel": "bas",
-    "xpack.ml.trainedModels.modelsList.startDeployment.maxNumOfProcessorsWarning": "Le produit du nombre d'allocations et de threads par allocation doit être inférieur au nombre total de processeurs sur vos nœuds ML.",
    "xpack.ml.trainedModels.modelsList.startDeployment.modalTitle": "Commencer le déploiement de {modelId}",
-    "xpack.ml.trainedModels.modelsList.startDeployment.normalPriorityLabel": "normal",
-    "xpack.ml.trainedModels.modelsList.startDeployment.numbersOfAllocationsHelp": "Augmenter pour améliorer le rendement d'ingestion de documents.",
-    "xpack.ml.trainedModels.modelsList.startDeployment.numbersOfAllocationsLabel": "Nombre d’allocations",
-    "xpack.ml.trainedModels.modelsList.startDeployment.numbersOfAllocationsMaxError": "Ne peut pas dépasser {max} - le nombre total de processeurs ML.",
-    "xpack.ml.trainedModels.modelsList.startDeployment.numbersOfAllocationsMinError": "Au moins une allocation est requise.",
-    "xpack.ml.trainedModels.modelsList.startDeployment.priorityHelp": "Sélectionnez une priorité faible pour les démonstrations où chaque modèle sera très peu utilisé.",
-    "xpack.ml.trainedModels.modelsList.startDeployment.priorityLabel": "Priorité",
-    "xpack.ml.trainedModels.modelsList.startDeployment.priorityLegend": "Sélecteur de priorité",
    "xpack.ml.trainedModels.modelsList.startDeployment.startButton": "Début",
-    "xpack.ml.trainedModels.modelsList.startDeployment.threadsPerAllocationHelp": "Augmenter pour améliorer la latence de l'inférence.",
-    "xpack.ml.trainedModels.modelsList.startDeployment.threadsPerAllocationLabel": "Threads par allocation",
-    "xpack.ml.trainedModels.modelsList.startDeployment.threadsPerAllocationLegend": "Sélecteur de threads par allocation",
    "xpack.ml.trainedModels.modelsList.startDeployment.updateButton": "Mettre à jour",
    "xpack.ml.trainedModels.modelsList.startDeployment.viewElserDocLink": "Afficher la documentation",
    "xpack.ml.trainedModels.modelsList.startFailed": "Impossible de démarrer \"{modelId}\"",
--- a/x-pack/plugins/translations/translations/ja-JP.json
+++ b/x-pack/plugins/translations/translations/ja-JP.json
@ -29922,24 +29922,10 @@
    "xpack.ml.trainedModels.modelsList.selectedModelsMessage": "{modelsCount, plural, other  {#個のモデル}}が選択されました",
    "xpack.ml.trainedModels.modelsList.startDeployment.cancelButton": "キャンセル",
    "xpack.ml.trainedModels.modelsList.startDeployment.deploymentIdError": "このIDのデプロイはすでに存在します。",
-    "xpack.ml.trainedModels.modelsList.startDeployment.deploymentIdHelp": "モデルデプロイの一意のIDを指定します。",
    "xpack.ml.trainedModels.modelsList.startDeployment.deploymentIdLabel": "デプロイID",
    "xpack.ml.trainedModels.modelsList.startDeployment.docLinkTitle": "詳細",
-    "xpack.ml.trainedModels.modelsList.startDeployment.lowPriorityLabel": "低",
-    "xpack.ml.trainedModels.modelsList.startDeployment.maxNumOfProcessorsWarning": "割り当て数と割り当てごとのスレッドの積は、MLノードのプロセッサーの合計数未満でなければなりません。",
    "xpack.ml.trainedModels.modelsList.startDeployment.modalTitle": "{modelId}デプロイを開始",
-    "xpack.ml.trainedModels.modelsList.startDeployment.normalPriorityLabel": "標準",
-    "xpack.ml.trainedModels.modelsList.startDeployment.numbersOfAllocationsHelp": "大きくすると、ドキュメントのインジェスト処理能力が向上します。",
-    "xpack.ml.trainedModels.modelsList.startDeployment.numbersOfAllocationsLabel": "割り当て数",
-    "xpack.ml.trainedModels.modelsList.startDeployment.numbersOfAllocationsMaxError": "MLプロセッサーの合計数の{max}を超えることはできません。",
-    "xpack.ml.trainedModels.modelsList.startDeployment.numbersOfAllocationsMinError": "1つ以上の割り当てが必要です。",
-    "xpack.ml.trainedModels.modelsList.startDeployment.priorityHelp": "各モデルが非常に軽く使用されるデモで低優先度を選択します。",
-    "xpack.ml.trainedModels.modelsList.startDeployment.priorityLabel": "優先度",
-    "xpack.ml.trainedModels.modelsList.startDeployment.priorityLegend": "優先度選択",
    "xpack.ml.trainedModels.modelsList.startDeployment.startButton": "開始",
-    "xpack.ml.trainedModels.modelsList.startDeployment.threadsPerAllocationHelp": "大きくすると、推論の待ち時間を改善します。",
-    "xpack.ml.trainedModels.modelsList.startDeployment.threadsPerAllocationLabel": "割り当てごとのスレッド",
-    "xpack.ml.trainedModels.modelsList.startDeployment.threadsPerAllocationLegend": "割り当てセレクターごとのスレッド",
    "xpack.ml.trainedModels.modelsList.startDeployment.updateButton": "更新",
    "xpack.ml.trainedModels.modelsList.startDeployment.viewElserDocLink": "ドキュメンテーションを表示",
    "xpack.ml.trainedModels.modelsList.startFailed": "\"{modelId}\"の開始に失敗しました",
--- a/x-pack/plugins/translations/translations/zh-CN.json
+++ b/x-pack/plugins/translations/translations/zh-CN.json
@ -29962,24 +29962,10 @@
    "xpack.ml.trainedModels.modelsList.selectedModelsMessage": "{modelsCount, plural, other {# 个模型}}已选择",
    "xpack.ml.trainedModels.modelsList.startDeployment.cancelButton": "取消",
    "xpack.ml.trainedModels.modelsList.startDeployment.deploymentIdError": "已存在具有此 ID 的部署。",
-    "xpack.ml.trainedModels.modelsList.startDeployment.deploymentIdHelp": "为模型部署指定唯一标识符。",
    "xpack.ml.trainedModels.modelsList.startDeployment.deploymentIdLabel": "部署 ID",
    "xpack.ml.trainedModels.modelsList.startDeployment.docLinkTitle": "了解详情",
-    "xpack.ml.trainedModels.modelsList.startDeployment.lowPriorityLabel": "低",
-    "xpack.ml.trainedModels.modelsList.startDeployment.maxNumOfProcessorsWarning": "产品的分配次数和每次分配的线程数应小于 ML 节点上处理器的总数。",
    "xpack.ml.trainedModels.modelsList.startDeployment.modalTitle": "开始 {modelId} 部署",
-    "xpack.ml.trainedModels.modelsList.startDeployment.normalPriorityLabel": "正常",
-    "xpack.ml.trainedModels.modelsList.startDeployment.numbersOfAllocationsHelp": "增加以提高文档采集吞吐量。",
-    "xpack.ml.trainedModels.modelsList.startDeployment.numbersOfAllocationsLabel": "分配次数",
-    "xpack.ml.trainedModels.modelsList.startDeployment.numbersOfAllocationsMaxError": "不能超过 {max} - ML 处理器的总数。",
-    "xpack.ml.trainedModels.modelsList.startDeployment.numbersOfAllocationsMinError": "至少需要一次分配。",
-    "xpack.ml.trainedModels.modelsList.startDeployment.priorityHelp": "为其中的每个模型将极少使用的演示选择低优先级。",
-    "xpack.ml.trainedModels.modelsList.startDeployment.priorityLabel": "优先级",
-    "xpack.ml.trainedModels.modelsList.startDeployment.priorityLegend": "优先级选择器",
    "xpack.ml.trainedModels.modelsList.startDeployment.startButton": "启动",
-    "xpack.ml.trainedModels.modelsList.startDeployment.threadsPerAllocationHelp": "增加以缩短推理延迟。",
-    "xpack.ml.trainedModels.modelsList.startDeployment.threadsPerAllocationLabel": "每次分配的线程数",
-    "xpack.ml.trainedModels.modelsList.startDeployment.threadsPerAllocationLegend": "每次分配的线程数选择器",
    "xpack.ml.trainedModels.modelsList.startDeployment.updateButton": "更新",
    "xpack.ml.trainedModels.modelsList.startDeployment.viewElserDocLink": "查看文档",
    "xpack.ml.trainedModels.modelsList.startFailed": "无法启动“{modelId}”",
--- a/x-pack/test/functional/apps/ml/short_tests/model_management/model_list.ts
+++ b/x-pack/test/functional/apps/ml/short_tests/model_management/model_list.ts
@ -129,6 +129,7 @@ export default function ({ getService }: FtrProviderContext) {
        await ml.securityUI.loginAsMlViewer();
        await ml.navigation.navigateToTrainedModels();
        await ml.commonUI.waitForRefreshButtonEnabled();
+        await ml.trainedModels.showAllModels();
      });

      after(async () => {
@ -165,6 +166,7 @@ export default function ({ getService }: FtrProviderContext) {
        await ml.securityUI.loginAsMlPowerUser();
        await ml.navigation.navigateToTrainedModels();
        await ml.commonUI.waitForRefreshButtonEnabled();
+        await ml.trainedModels.showAllModels();
      });

      after(async () => {
@ -493,9 +495,9 @@ export default function ({ getService }: FtrProviderContext) {

          it(`starts deployment of the imported model ${model.id}`, async () => {
            await ml.trainedModelsTable.startDeploymentWithParams(model.id, {
-              priority: 'normal',
-              numOfAllocations: 1,
-              threadsPerAllocation: 2,
+              vCPULevel: 'medium',
+              optimized: 'optimizedForSearch',
+              adaptiveResources: false,
            });
            await ml.trainedModelsTable.assertModelDeleteActionButtonEnabled(model.id, false);
          });
--- a/x-pack/test/functional/services/ml/trained_models.ts
+++ b/x-pack/test/functional/services/ml/trained_models.ts
@ -167,6 +167,20 @@ export function TrainedModelsProvider({ getService }: FtrProviderContext, mlComm
        await testSubjects.existOrFail('mlDeleteSpaceAwareItemCheckModalOverlay');
      });
    },
+
+    async assertShowAllSelected(expectChecked: boolean) {
+      expect(await testSubjects.isEuiSwitchChecked(`mlModelsShowAllSwitch`)).to.eql(
+        expectChecked,
+        `Expected the "Show all" control to be ${expectChecked ? 'enabled' : 'disabled'}`
+      );
+    },
+
+    async showAllModels() {
+      await retry.tryForTime(3_000, async () => {
+        await mlCommonUI.toggleSwitchIfNeeded('mlModelsShowAllSwitch', true);
+        await this.assertShowAllSelected(true);
+      });
+    },
  };
 }

--- a/x-pack/test/functional/services/ml/trained_models_table.ts
+++ b/x-pack/test/functional/services/ml/trained_models_table.ts
@ -504,49 +504,80 @@ export function TrainedModelsTableProvider(
      await this.assertDeployModelFlyoutExists();
    }

-    async assertNumOfAllocations(expectedValue: number) {
-      const actualValue = await testSubjects.getAttribute(
-        'mlModelsStartDeploymentModalNumOfAllocations',
-        'value'
+    async assertOptimizedFor(expectedValue: 'optimizedForIngest' | 'optimizedForSearch') {
+      const element = await testSubjects.find(
+        `mlModelsStartDeploymentModalOptimized_${expectedValue}`
      );
-      expect(actualValue).to.eql(
-        expectedValue,
-        `Expected number of allocations to equal ${expectedValue}, got ${actualValue}`
+      const inputElement = await element.findByTagName('input');
+      const isChecked = await inputElement.getAttribute('checked');
+
+      expect(isChecked).to.eql(
+        'true',
+        `Expected optimized for ${expectedValue} to be selected, got ${isChecked}`
      );
    }

-    public async setNumOfAllocations(value: number) {
-      await testSubjects.setValue('mlModelsStartDeploymentModalNumOfAllocations', value.toString());
-      await this.assertNumOfAllocations(value);
+    public async setOptimizedFor(optimized: 'optimizedForIngest' | 'optimizedForSearch') {
+      const element = await testSubjects.find(`mlModelsStartDeploymentModalOptimized_${optimized}`);
+      await element.click();
+      await this.assertOptimizedFor(optimized);
    }

-    public async setPriority(value: 'low' | 'normal') {
-      await mlCommonUI.selectButtonGroupValue(
-        'mlModelsStartDeploymentModalPriority',
-        value.toString(),
-        value === 'normal'
-          ? 'mlModelsStartDeploymentModalNormalPriority'
-          : 'mlModelsStartDeploymentModalLowPriority'
-      );
+    public async setVCPULevel(value: 'low' | 'medium' | 'high') {
+      const valuesMap = {
+        low: 0.5,
+        medium: 1.5,
+        high: 2.5,
+      };
+      await mlCommonUI.setSliderValue('mlModelsStartDeploymentModalVCPULevel', valuesMap[value]);
+      await mlCommonUI.assertSliderValue('mlModelsStartDeploymentModalVCPULevel', valuesMap[value]);
    }

-    public async setThreadsPerAllocation(value: number) {
-      await mlCommonUI.selectButtonGroupValue(
-        'mlModelsStartDeploymentModalThreadsPerAllocation',
-        value.toString(),
-        `mlModelsStartDeploymentModalThreadsPerAllocation_${value}`
+    public async assertAdvancedConfigurationOpen(expectedValue: boolean) {
+      await retry.tryForTime(5 * 1000, async () => {
+        const panelElement = await testSubjects.find(
+          'mlModelsStartDeploymentModalAdvancedConfiguration'
+        );
+        const isOpen = await panelElement.elementHasClass('euiAccordion-isOpen');
+
+        expect(isOpen).to.eql(
+          expectedValue,
+          `Expected Advanced configuration to be ${expectedValue ? 'open' : 'closed'}`
+        );
+      });
+    }
+
+    public async toggleAdvancedConfiguration(open: boolean) {
+      const panelElement = await testSubjects.find(
+        'mlModelsStartDeploymentModalAdvancedConfiguration'
      );
+      const toggleButton = await panelElement.findByTagName('button');
+      await toggleButton.click();
+      await this.assertAdvancedConfigurationOpen(open);
    }

    public async startDeploymentWithParams(
      modelId: string,
-      params: { priority: 'low' | 'normal'; numOfAllocations: number; threadsPerAllocation: number }
+      params: {
+        optimized: 'optimizedForIngest' | 'optimizedForSearch';
+        vCPULevel?: 'low' | 'medium' | 'high';
+        adaptiveResources?: boolean;
+      }
    ) {
      await this.openStartDeploymentModal(modelId);

-      await this.setPriority(params.priority);
-      await this.setNumOfAllocations(params.numOfAllocations);
-      await this.setThreadsPerAllocation(params.threadsPerAllocation);
+      await this.setOptimizedFor(params.optimized);
+
+      const hasAdvancedConfiguration =
+        params.vCPULevel !== undefined || params.adaptiveResources !== undefined;
+
+      if (hasAdvancedConfiguration) {
+        await this.toggleAdvancedConfiguration(true);
+      }
+
+      if (params.vCPULevel) {
+        await this.setVCPULevel(params.vCPULevel);
+      }

      await testSubjects.click('mlModelsStartDeploymentModalStartButton');
      await this.assertStartDeploymentModalExists(false);