[8.x] [ML] Update vCPUs ranges for start model deployment (#195617) (#196156)

# Backport This will backport the following commits from `main` to `8.x`: - [[ML] Update vCPUs ranges for start model deployment (#195617)](https://github.com/elastic/kibana/pull/195617)  ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport)  Co-authored-by: Dima Arnautov <dmitrii.arnautov@elastic.co>
2025-04-23 09:19:04 -04:00 · 2024-10-15 03:27:52 +11:00 · 2024-10-15 03:27:52 +11:00 · 107ff84cbf
commit 107ff84cbf
parent eca46fe4bc
18 changed files with 493 additions and 75 deletions
--- a/config/serverless.es.yml
+++ b/config/serverless.es.yml
@ -57,7 +57,23 @@ xpack.painless_lab.enabled: false

 xpack.ml.ad.enabled: false
 xpack.ml.dfa.enabled: false
-xpack.ml.nlp.enabled: true
+xpack.ml.nlp:
+  enabled: true
+  modelDeployment:
+    allowStaticAllocations: true
+    vCPURange:
+      low:
+        min: 0
+        max: 2
+        static: 2
+      medium:
+        min: 1
+        max: 32
+        static: 32
+      high:
+        min: 1
+        max: 512
+        static: 512
 xpack.ml.compatibleModuleType: 'search'

 data_visualizer.resultLinks.fileBeat.enabled: false
--- a/config/serverless.oblt.yml
+++ b/config/serverless.oblt.yml
@ -189,7 +189,20 @@ telemetry.labels.serverless: observability

 xpack.ml.ad.enabled: true
 xpack.ml.dfa.enabled: false
-xpack.ml.nlp.enabled: true
+xpack.ml.nlp:
+  enabled: true
+  modelDeployment:
+    allowStaticAllocations: false
+    vCPURange:
+      low:
+        min: 0
+        max: 2
+      medium:
+        min: 1
+        max: 32
+      high:
+        min: 1
+        max: 128
 xpack.ml.compatibleModuleType: 'observability'

 # Disable the embedded Dev Console
--- a/config/serverless.security.yml
+++ b/config/serverless.security.yml
@ -100,7 +100,20 @@ xpack.fleet.packages:

 xpack.ml.ad.enabled: true
 xpack.ml.dfa.enabled: true
-xpack.ml.nlp.enabled: true
+xpack.ml.nlp:
+  enabled: true
+  modelDeployment:
+    allowStaticAllocations: false
+    vCPURange:
+      low:
+        min: 0
+        max: 2
+      medium:
+        min: 1
+        max: 32
+      high:
+        min: 1
+        max: 128
 xpack.ml.compatibleModuleType: 'security'

 # Disable the embedded Dev Console
--- a/test/plugin_functional/test_suites/core_plugins/rendering.ts
+++ b/test/plugin_functional/test_suites/core_plugins/rendering.ts
@ -300,6 +300,16 @@ export default function ({ getService }: PluginFunctionalProviderContext) {
        'xpack.ml.ad.enabled (boolean)',
        'xpack.ml.dfa.enabled (boolean)',
        'xpack.ml.nlp.enabled (boolean)',
+        'xpack.ml.nlp.modelDeployment.allowStaticAllocations (boolean)',
+        'xpack.ml.nlp.modelDeployment.vCPURange.high.max (number)',
+        'xpack.ml.nlp.modelDeployment.vCPURange.high.min (number)',
+        'xpack.ml.nlp.modelDeployment.vCPURange.high.static (number?)',
+        'xpack.ml.nlp.modelDeployment.vCPURange.low.max (number)',
+        'xpack.ml.nlp.modelDeployment.vCPURange.low.min (number)',
+        'xpack.ml.nlp.modelDeployment.vCPURange.low.static (number?)',
+        'xpack.ml.nlp.modelDeployment.vCPURange.medium.max (number)',
+        'xpack.ml.nlp.modelDeployment.vCPURange.medium.min (number)',
+        'xpack.ml.nlp.modelDeployment.vCPURange.medium.static (number?)',
        'xpack.osquery.actionEnabled (boolean?)',
        'xpack.remote_clusters.ui.enabled (boolean?)',
        /**
--- a/x-pack/plugins/ml/common/constants/app.ts
+++ b/x-pack/plugins/ml/common/constants/app.ts
@ -20,11 +20,29 @@ export const ML_EXTERNAL_BASE_PATH = '/api/ml';
 export type MlFeatures = Record<'ad' | 'dfa' | 'nlp', boolean>;
 export type CompatibleModule = 'security' | 'observability' | 'search';
 export type ExperimentalFeatures = Record<'ruleFormV2', boolean>;
+export interface ModelDeploymentSettings {
+  allowStaticAllocations: boolean;
+  vCPURange: Record<
+    'low' | 'medium' | 'high',
+    {
+      min: number;
+      max: number;
+      static?: number;
+    }
+  >;
+}
+
+export interface NLPSettings {
+  modelDeployment: ModelDeploymentSettings;
+}

 export interface ConfigSchema {
  ad?: { enabled: boolean };
  dfa?: { enabled: boolean };
-  nlp?: { enabled: boolean };
+  nlp?: {
+    enabled: boolean;
+    modelDeployment?: ModelDeploymentSettings;
+  };
  compatibleModuleType?: CompatibleModule;
  experimental?: {
    ruleFormV2?: { enabled: boolean };
@ -51,3 +69,9 @@ export function initExperimentalFeatures(
    experimentalFeatures.ruleFormV2 = config.experimental.ruleFormV2.enabled;
  }
 }
+
+export function initModelDeploymentSettings(nlpSettings: NLPSettings, config: ConfigSchema) {
+  if (config.nlp?.modelDeployment !== undefined) {
+    nlpSettings.modelDeployment = config.nlp.modelDeployment;
+  }
+}
--- a/x-pack/plugins/ml/public/application/app.tsx
+++ b/x-pack/plugins/ml/public/application/app.tsx
@ -19,13 +19,13 @@ import { KibanaRenderContextProvider } from '@kbn/react-kibana-context-render';
 import { StorageContextProvider } from '@kbn/ml-local-storage';
 import useLifecycles from 'react-use/lib/useLifecycles';
 import useObservable from 'react-use/lib/useObservable';
-import type { ExperimentalFeatures, MlFeatures } from '../../common/constants/app';
+import type { ExperimentalFeatures, MlFeatures, NLPSettings } from '../../common/constants/app';
 import { ML_STORAGE_KEYS } from '../../common/types/storage';
 import type { MlSetupDependencies, MlStartDependencies } from '../plugin';
 import { setLicenseCache } from './license';
 import { MlRouter } from './routing';
 import type { PageDependencies } from './routing/router';
-import { EnabledFeaturesContextProvider } from './contexts/ml';
+import { EnabledFeaturesContextProvider, MlServerInfoContextProvider } from './contexts/ml';
 import type { StartServices } from './contexts/kibana';
 import { getMlGlobalServices } from './util/get_services';

@ -42,6 +42,7 @@ interface AppProps {
  isServerless: boolean;
  mlFeatures: MlFeatures;
  experimentalFeatures: ExperimentalFeatures;
+  nlpSettings: NLPSettings;
 }

 const localStorage = new Storage(window.localStorage);
@ -59,6 +60,7 @@ const App: FC<AppProps> = ({
  isServerless,
  mlFeatures,
  experimentalFeatures,
+  nlpSettings,
 }) => {
  const pageDeps: PageDependencies = {
    history: appMountParams.history,
@ -142,7 +144,9 @@ const App: FC<AppProps> = ({
                showMLNavMenu={chromeStyle === 'classic'}
                experimentalFeatures={experimentalFeatures}
              >
-                <MlRouter pageDeps={pageDeps} />
+                <MlServerInfoContextProvider nlpSettings={nlpSettings}>
+                  <MlRouter pageDeps={pageDeps} />
+                </MlServerInfoContextProvider>
              </EnabledFeaturesContextProvider>
            </DatePickerContextProvider>
          </StorageContextProvider>
@ -158,7 +162,8 @@ export const renderApp = (
  appMountParams: AppMountParameters,
  isServerless: boolean,
  mlFeatures: MlFeatures,
-  experimentalFeatures: ExperimentalFeatures
+  experimentalFeatures: ExperimentalFeatures,
+  nlpSettings: NLPSettings
 ) => {
  appMountParams.onAppLeave((actions) => actions.default());

@ -170,6 +175,7 @@ export const renderApp = (
      isServerless={isServerless}
      mlFeatures={mlFeatures}
      experimentalFeatures={experimentalFeatures}
+      nlpSettings={nlpSettings}
    />,
    appMountParams.element
  );
--- a/x-pack/plugins/ml/public/application/contexts/ml/index.ts
+++ b/x-pack/plugins/ml/public/application/contexts/ml/index.ts
@ -7,3 +7,4 @@

 export { DataSourceContextProvider, useDataSource } from './data_source_context';
 export { EnabledFeaturesContextProvider, useEnabledFeatures } from './serverless_context';
+export { MlServerInfoContextProvider, useMlServerInfo } from './ml_server_info_context';
--- a/x-pack/plugins/ml/public/application/contexts/ml/ml_server_info_context.tsx
+++ b/x-pack/plugins/ml/public/application/contexts/ml/ml_server_info_context.tsx
@ -0,0 +1,39 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import React, { type FC, type PropsWithChildren, createContext, useContext } from 'react';
+import type { NLPSettings } from '../../../../common/constants/app';
+
+export interface MlServerInfoContextValue {
+  // TODO add ML server info
+  nlpSettings: NLPSettings;
+}
+
+export const MlServerInfoContext = createContext<MlServerInfoContextValue | undefined>(undefined);
+
+export const MlServerInfoContextProvider: FC<PropsWithChildren<MlServerInfoContextValue>> = ({
+  children,
+  nlpSettings,
+}) => {
+  return (
+    <MlServerInfoContext.Provider
+      value={{
+        nlpSettings,
+      }}
+    >
+      {children}
+    </MlServerInfoContext.Provider>
+  );
+};
+
+export function useMlServerInfo() {
+  const context = useContext(MlServerInfoContext);
+  if (context === undefined) {
+    throw new Error('useMlServerInfo must be used within a MlServerInfoContextProvider');
+  }
+  return context;
+}
--- a/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.test.ts
+++ b/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.test.ts
@ -44,23 +44,82 @@ describe('DeploymentParamsMapper', () => {

      it('should get correct VCU levels', () => {
        expect(mapper.getVCURange('low')).toEqual({
-          min: 8,
+          min: 0,
          max: 16,
          static: 16,
        });
        expect(mapper.getVCURange('medium')).toEqual({
-          min: 24,
+          min: 8,
          max: 256,
          static: 256,
        });
        expect(mapper.getVCURange('high')).toEqual({
-          min: 264,
-          max: 4000,
-          static: 800,
+          min: 8,
+          max: 4096,
+          static: 4096,
        });
      });

-      it('should enforce adaptive allocations', () => {
+      it('maps UI params to API correctly', () => {
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: false,
+            vCPUUsage: 'low',
+          })
+        ).toEqual({
+          number_of_allocations: 1,
+          deployment_id: 'test-deployment',
+          model_id: 'test-model',
+          priority: 'normal',
+          threads_per_allocation: 2,
+        });
+
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForIngest',
+            adaptiveResources: false,
+            vCPUUsage: 'low',
+          })
+        ).toEqual({
+          deployment_id: 'test-deployment',
+          model_id: 'test-model',
+          priority: 'normal',
+          threads_per_allocation: 1,
+          number_of_allocations: 2,
+        });
+      });
+
+      it('overrides vCPUs levels and enforces adaptive allocations if static support is not configured', () => {
+        mapper = new DeploymentParamsMapper(modelId, mlServerLimits, cloudInfo, false, {
+          modelDeployment: {
+            allowStaticAllocations: false,
+            vCPURange: {
+              low: { min: 0, max: 2, static: 2 },
+              medium: { min: 1, max: 32, static: 32 },
+              high: { min: 1, max: 128, static: 128 },
+            },
+          },
+        });
+
+        expect(mapper.getVCURange('low')).toEqual({
+          min: 0,
+          max: 16,
+          static: 16,
+        });
+        expect(mapper.getVCURange('medium')).toEqual({
+          min: 8,
+          max: 256,
+          static: 256,
+        });
+        expect(mapper.getVCURange('high')).toEqual({
+          min: 8,
+          max: 1024,
+          static: 1024,
+        });
+
        expect(
          mapper.mapUiToApiDeploymentParams({
            deploymentId: 'test-deployment',
@ -72,7 +131,7 @@ describe('DeploymentParamsMapper', () => {
          adaptive_allocations: {
            enabled: true,
            max_number_of_allocations: 1,
-            min_number_of_allocations: 1,
+            min_number_of_allocations: 0,
          },
          deployment_id: 'test-deployment',
          model_id: 'test-model',
@ -88,15 +147,15 @@ describe('DeploymentParamsMapper', () => {
            vCPUUsage: 'low',
          })
        ).toEqual({
-          adaptive_allocations: {
-            enabled: true,
-            max_number_of_allocations: 2,
-            min_number_of_allocations: 1,
-          },
          deployment_id: 'test-deployment',
          model_id: 'test-model',
          priority: 'normal',
          threads_per_allocation: 1,
+          adaptive_allocations: {
+            enabled: true,
+            max_number_of_allocations: 2,
+            min_number_of_allocations: 0,
+          },
        });
      });
    });
@ -468,7 +527,7 @@ describe('DeploymentParamsMapper', () => {
          threads_per_allocation: 2,
          adaptive_allocations: {
            enabled: true,
-            min_number_of_allocations: 1,
+            min_number_of_allocations: 0,
            max_number_of_allocations: 1,
          },
        });
@ -507,7 +566,7 @@ describe('DeploymentParamsMapper', () => {
          adaptive_allocations: {
            enabled: true,
            max_number_of_allocations: 12499,
-            min_number_of_allocations: 4,
+            min_number_of_allocations: 1,
          },
        });

@ -525,7 +584,7 @@ describe('DeploymentParamsMapper', () => {
          threads_per_allocation: 1,
          adaptive_allocations: {
            enabled: true,
-            min_number_of_allocations: 1,
+            min_number_of_allocations: 0,
            max_number_of_allocations: 2,
          },
        });
@ -544,7 +603,7 @@ describe('DeploymentParamsMapper', () => {
          threads_per_allocation: 1,
          adaptive_allocations: {
            enabled: true,
-            min_number_of_allocations: 3,
+            min_number_of_allocations: 1,
            max_number_of_allocations: 32,
          },
        });
@ -563,7 +622,7 @@ describe('DeploymentParamsMapper', () => {
          threads_per_allocation: 1,
          adaptive_allocations: {
            enabled: true,
-            min_number_of_allocations: 33,
+            min_number_of_allocations: 1,
            max_number_of_allocations: 99999,
          },
        });
--- a/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.ts
+++ b/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.ts
@ -6,6 +6,7 @@
 */

 import type { MlStartTrainedModelDeploymentRequest } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
+import type { NLPSettings } from '../../../common/constants/app';
 import type { TrainedModelDeploymentStatsResponse } from '../../../common/types/trained_models';
 import type { CloudInfo } from '../services/ml_server_info';
 import type { MlServerLimits } from '../../../common/types/ml_server_info';
@ -17,16 +18,16 @@ export type MlStartTrainedModelDeploymentRequestNew = MlStartTrainedModelDeploym

 const THREADS_MAX_EXPONENT = 5;

-// TODO set to 0 when https://github.com/elastic/elasticsearch/pull/113455 is merged
-const MIN_SUPPORTED_NUMBER_OF_ALLOCATIONS = 1;
-
 type VCPUBreakpoints = Record<
  DeploymentParamsUI['vCPUUsage'],
  {
    min: number;
    max: number;
-    /** Static value is used for the number of vCPUs when the adaptive resources are disabled */
-    static: number;
+    /**
+     * Static value is used for the number of vCPUs when the adaptive resources are disabled.
+     * Not allowed in certain environments.
+     */
+    static?: number;
  }
 >;

@ -39,26 +40,28 @@ export class DeploymentParamsMapper {
  private readonly threadingParamsValues: number[];

  /**
-   * vCPUs level breakpoints for cloud cluster with enabled ML autoscaling
+   * vCPUs level breakpoints for cloud cluster with enabled ML autoscaling.
+   * TODO resolve dynamically when Control Pane exposes the vCPUs range.
   */
  private readonly autoscalingVCPUBreakpoints: VCPUBreakpoints = {
-    low: { min: MIN_SUPPORTED_NUMBER_OF_ALLOCATIONS, max: 2, static: 2 },
-    medium: { min: 3, max: 32, static: 32 },
-    high: { min: 33, max: 99999, static: 100 },
+    low: { min: this.minAllowedNumberOfAllocation, max: 2, static: 2 },
+    medium: { min: 1, max: 32, static: 32 },
+    high: { min: 1, max: 99999, static: 128 },
  };

  /**
-   * vCPUs level breakpoints for serverless projects
+   * Default vCPUs level breakpoints for serverless projects.
+   * Can be overridden by the project specific settings.
   */
  private readonly serverlessVCPUBreakpoints: VCPUBreakpoints = {
-    low: { min: MIN_SUPPORTED_NUMBER_OF_ALLOCATIONS, max: 2, static: 2 },
-    medium: { min: 3, max: 32, static: 32 },
-    high: { min: 33, max: 500, static: 100 },
+    low: { min: this.minAllowedNumberOfAllocation, max: 2, static: 2 },
+    medium: { min: 1, max: 32, static: 32 },
+    high: { min: 1, max: 512, static: 512 },
  };

  /**
   * vCPUs level breakpoints based on the ML server limits.
-   * Either on-prem or cloud with disabled ML autoscaling
+   * Either on-prem or cloud with disabled ML autoscaling.
   */
  private readonly hardwareVCPUBreakpoints: VCPUBreakpoints;

@ -67,12 +70,26 @@ export class DeploymentParamsMapper {
   */
  private readonly vCpuBreakpoints: VCPUBreakpoints;

+  /**
+   * Gets the min allowed number of allocations.
+   * - 0 for serverless and ESS with enabled autoscaling.
+   * - 1 otherwise
+   * @private
+   */
+  private get minAllowedNumberOfAllocation(): number {
+    return !this.showNodeInfo || this.cloudInfo.isMlAutoscalingEnabled ? 0 : 1;
+  }
+
  constructor(
    private readonly modelId: string,
    private readonly mlServerLimits: MlServerLimits,
    private readonly cloudInfo: CloudInfo,
-    private readonly showNodeInfo: boolean
+    private readonly showNodeInfo: boolean,
+    private readonly nlpSettings?: NLPSettings
  ) {
+    /**
+     * Initial value can be different for serverless and ESS with autoscaling.
+     */
    const maxSingleMlNodeProcessors = this.mlServerLimits.max_single_ml_node_processors;

    this.threadingParamsValues = new Array(THREADS_MAX_EXPONENT)
@ -83,7 +100,7 @@ export class DeploymentParamsMapper {
    const mediumValue = this.mlServerLimits!.total_ml_processors! / 2;

    this.hardwareVCPUBreakpoints = {
-      low: { min: MIN_SUPPORTED_NUMBER_OF_ALLOCATIONS, max: 2, static: 2 },
+      low: { min: this.minAllowedNumberOfAllocation, max: 2, static: 2 },
      medium: { min: Math.min(3, mediumValue), max: mediumValue, static: mediumValue },
      high: {
        min: mediumValue + 1,
@ -94,6 +111,10 @@ export class DeploymentParamsMapper {

    if (!this.showNodeInfo) {
      this.vCpuBreakpoints = this.serverlessVCPUBreakpoints;
+      if (this.nlpSettings?.modelDeployment) {
+        // Apply project specific overrides
+        this.vCpuBreakpoints = this.nlpSettings.modelDeployment.vCPURange;
+      }
    } else if (this.cloudInfo.isMlAutoscalingEnabled) {
      this.vCpuBreakpoints = this.autoscalingVCPUBreakpoints;
    } else {
@ -108,6 +129,11 @@ export class DeploymentParamsMapper {
    return input.vCPUUsage === 'low' ? 2 : Math.max(...this.threadingParamsValues);
  }

+  /**
+   * Returns allocation values accounting for the number of threads per allocation.
+   * @param params
+   * @private
+   */
  private getAllocationsParams(
    params: DeploymentParamsUI
  ): Pick<MlStartTrainedModelDeploymentRequestNew, 'number_of_allocations'> &
@ -126,7 +152,7 @@ export class DeploymentParamsMapper {
      min_number_of_allocations:
        Math.floor(levelValues.min / threadsPerAllocation) ||
        // in any env, allow scale down to 0 only for "low" vCPU usage
-        (params.vCPUUsage === 'low' ? MIN_SUPPORTED_NUMBER_OF_ALLOCATIONS : 1),
+        (params.vCPUUsage === 'low' ? this.minAllowedNumberOfAllocation : 1),
      max_number_of_allocations: maxValue,
    };
  }
@ -148,7 +174,7 @@ export class DeploymentParamsMapper {
  public getVCURange(vCPUUsage: DeploymentParamsUI['vCPUUsage']) {
    // general purpose (c6gd) 1VCU = 1GB RAM / 0.5 vCPU
    // vector optimized (r6gd) 1VCU = 1GB RAM / 0.125 vCPU
-    const vCPUBreakpoints = this.serverlessVCPUBreakpoints[vCPUUsage];
+    const vCPUBreakpoints = this.vCpuBreakpoints[vCPUUsage];

    return Object.entries(vCPUBreakpoints).reduce((acc, [key, val]) => {
      // as we can't retrieve Search project configuration, we assume that the vector optimized instance is used
@ -165,8 +191,8 @@ export class DeploymentParamsMapper {
    input: DeploymentParamsUI
  ): MlStartTrainedModelDeploymentRequestNew {
    const resultInput: DeploymentParamsUI = Object.create(input);
-    if (!this.showNodeInfo) {
-      // Enforce adaptive resources for serverless
+    if (!this.showNodeInfo && this.nlpSettings?.modelDeployment.allowStaticAllocations === false) {
+      // Enforce adaptive resources for serverless projects with prohibited static allocations
      resultInput.adaptiveResources = true;
    }

@ -177,7 +203,7 @@ export class DeploymentParamsMapper {
      deployment_id: resultInput.deploymentId,
      priority: 'normal',
      threads_per_allocation: this.getNumberOfThreads(resultInput),
-      ...(resultInput.adaptiveResources || !this.showNodeInfo
+      ...(resultInput.adaptiveResources
        ? {
            adaptive_allocations: {
              enabled: true,
--- a/x-pack/plugins/ml/public/application/model_management/deployment_setup.tsx
+++ b/x-pack/plugins/ml/public/application/model_management/deployment_setup.tsx
@ -41,6 +41,7 @@ import type { CoreStart, OverlayStart } from '@kbn/core/public';
 import { css } from '@emotion/react';
 import { toMountPoint } from '@kbn/react-kibana-mount';
 import { dictionaryValidator } from '@kbn/ml-validators';
+import type { NLPSettings } from '../../../common/constants/app';
 import type { TrainedModelDeploymentStatsResponse } from '../../../common/types/trained_models';
 import { type CloudInfo, getNewJobLimits } from '../services/ml_server_info';
 import type { ModelItem } from './models_list';
@ -220,7 +221,7 @@ export const DeploymentSetup: FC<DeploymentSetupProps> = ({
  const helperText = useMemo<string | undefined>(() => {
    const vcpuRange = deploymentParamsMapper.getVCPURange(config.vCPUUsage);

-    if (cloudInfo.isCloud && cloudInfo.isMlAutoscalingEnabled) {
+    if (cloudInfo.isCloud && cloudInfo.isMlAutoscalingEnabled && showNodeInfo) {
      // Running in cloud with ML autoscaling enabled
      if (config.adaptiveResources) {
        // With adaptive resources
@ -285,7 +286,7 @@ export const DeploymentSetup: FC<DeploymentSetupProps> = ({
        }
      }
    } else if (
-      (cloudInfo.isCloud && !cloudInfo.isMlAutoscalingEnabled) ||
+      (cloudInfo.isCloud && !cloudInfo.isMlAutoscalingEnabled && showNodeInfo) ||
      (!cloudInfo.isCloud && showNodeInfo)
    ) {
      // Running in cloud with autoscaling disabled or on-prem
@ -352,7 +353,7 @@ export const DeploymentSetup: FC<DeploymentSetupProps> = ({
        }
      }
    } else if (!showNodeInfo) {
-      // Running a Search project in serverless
+      // Running in serverless
      const vcuRange = deploymentParamsMapper.getVCURange(config.vCPUUsage);

      if (config.adaptiveResources) {
@ -386,6 +387,29 @@ export const DeploymentSetup: FC<DeploymentSetupProps> = ({
              }
            );
        }
+      } else {
+        // Static allocations are allowed for Search projects
+        switch (config.vCPUUsage) {
+          case 'low':
+            return i18n.translate(
+              'xpack.ml.trainedModels.modelsList.startDeployment.serverless.lowCpuStaticHelp',
+              {
+                defaultMessage:
+                  'This level set resources to {staticVCUs, plural, one {VCU} other {# VCUs}}, which may be suitable for development, testing, and demos depending on your parameters. It is not recommended for production use.',
+                values: { staticVCUs: vcuRange.static },
+              }
+            );
+          case 'medium':
+          case 'high':
+            return i18n.translate(
+              'xpack.ml.trainedModels.modelsList.startDeployment.serverless.mediumCpuStaticHelp',
+              {
+                defaultMessage:
+                  'Your model will consume {staticVCUs, plural, one {VCU} other {# VCUs}}, even when not in use.',
+                values: { staticVCUs: vcuRange.static },
+              }
+            );
+        }
      }
    }
  }, [
@ -570,8 +594,8 @@ export const DeploymentSetup: FC<DeploymentSetupProps> = ({
          <EuiSpacer size={'s'} />

          <EuiFormHelpText id={'vCpuRangeHelp'}>
-            <EuiCallOut size="s">
-              <p>{helperText}</p>
+            <EuiCallOut size="s" data-test-subj="mlModelsStartDeploymentModalVCPUHelperText">
+              {helperText}
            </EuiCallOut>
          </EuiFormHelpText>
        </EuiPanel>
@ -630,6 +654,7 @@ interface StartDeploymentModalProps {
  cloudInfo: CloudInfo;
  deploymentParamsMapper: DeploymentParamsMapper;
  showNodeInfo: boolean;
+  nlpSettings: NLPSettings;
 }

 /**
@ -645,6 +670,7 @@ export const StartUpdateDeploymentModal: FC<StartDeploymentModalProps> = ({
  cloudInfo,
  deploymentParamsMapper,
  showNodeInfo,
+  nlpSettings,
 }) => {
  const isUpdate = !!initialParams;

@ -653,20 +679,22 @@ export const StartUpdateDeploymentModal: FC<StartDeploymentModalProps> = ({
      deploymentParamsMapper.mapApiToUiDeploymentParams(v)
    );

+    const defaultVCPUUsage: DeploymentParamsUI['vCPUUsage'] = showNodeInfo ? 'medium' : 'low';
+
    return uiParams?.some((v) => v.optimized === 'optimizedForIngest')
      ? {
          deploymentId: `${model.model_id}_search`,
          optimized: 'optimizedForSearch',
-          vCPUUsage: 'medium',
+          vCPUUsage: defaultVCPUUsage,
          adaptiveResources: true,
        }
      : {
          deploymentId: `${model.model_id}_ingest`,
          optimized: 'optimizedForIngest',
-          vCPUUsage: 'medium',
+          vCPUUsage: defaultVCPUUsage,
          adaptiveResources: true,
        };
-  }, [deploymentParamsMapper, model.model_id, model.stats?.deployment_stats]);
+  }, [deploymentParamsMapper, model.model_id, model.stats?.deployment_stats, showNodeInfo]);

  const [config, setConfig] = useState<DeploymentParamsUI>(initialParams ?? getDefaultParams());

@ -721,7 +749,9 @@ export const StartUpdateDeploymentModal: FC<StartDeploymentModalProps> = ({
          onConfigChange={setConfig}
          errors={errors}
          isUpdate={isUpdate}
-          disableAdaptiveResourcesControl={!showNodeInfo}
+          disableAdaptiveResourcesControl={
+            showNodeInfo ? false : !nlpSettings.modelDeployment.allowStaticAllocations
+          }
          deploymentsParams={model.stats?.deployment_stats.reduce<
            Record<string, DeploymentParamsUI>
          >((acc, curr) => {
@ -811,7 +841,8 @@ export const getUserInputModelDeploymentParamsProvider =
    startServices: Pick<CoreStart, 'analytics' | 'i18n' | 'theme'>,
    startModelDeploymentDocUrl: string,
    cloudInfo: CloudInfo,
-    showNodeInfo: boolean
+    showNodeInfo: boolean,
+    nlpSettings: NLPSettings
  ) =>
  (
    model: ModelItem,
@ -822,7 +853,8 @@ export const getUserInputModelDeploymentParamsProvider =
      model.model_id,
      getNewJobLimits(),
      cloudInfo,
-      showNodeInfo
+      showNodeInfo,
+      nlpSettings
    );

    const params = initialParams
@ -834,6 +866,7 @@ export const getUserInputModelDeploymentParamsProvider =
        const modalSession = overlays.openModal(
          toMountPoint(
            <StartUpdateDeploymentModal
+              nlpSettings={nlpSettings}
              showNodeInfo={showNodeInfo}
              deploymentParamsMapper={deploymentParamsMapper}
              cloudInfo={cloudInfo}
--- a/x-pack/plugins/ml/public/application/model_management/model_actions.tsx
+++ b/x-pack/plugins/ml/public/application/model_management/model_actions.tsx
@ -20,7 +20,7 @@ import {
  getAnalysisType,
  type DataFrameAnalysisConfigType,
 } from '@kbn/ml-data-frame-analytics-utils';
-import { useEnabledFeatures } from '../contexts/ml';
+import { useEnabledFeatures, useMlServerInfo } from '../contexts/ml';
 import { useTrainedModelsApiService } from '../services/ml_api_service/trained_models';
 import { getUserConfirmationProvider } from './force_stop_dialog';
 import { useToastNotificationService } from '../services/toast_notification_service';
@ -66,6 +66,7 @@ export function useModelActions({
  } = useMlKibana();

  const { showNodeInfo } = useEnabledFeatures();
+  const { nlpSettings } = useMlServerInfo();

  const cloudInfo = useCloudCheck();

@ -124,9 +125,10 @@ export function useModelActions({
        startServices,
        startModelDeploymentDocUrl,
        cloudInfo,
-        showNodeInfo
+        showNodeInfo,
+        nlpSettings
      ),
-    [overlays, startServices, startModelDeploymentDocUrl, cloudInfo, showNodeInfo]
+    [overlays, startServices, startModelDeploymentDocUrl, cloudInfo, showNodeInfo, nlpSettings]
  );

  const isBuiltInModel = useCallback(
@ -214,7 +216,10 @@ export function useModelActions({
        },
        available: (item) => {
          return (
-            item.model_type === TRAINED_MODEL_TYPE.PYTORCH && item.state === MODEL_STATE.DOWNLOADED
+            item.model_type === TRAINED_MODEL_TYPE.PYTORCH &&
+            !!item.state &&
+            item.state !== MODEL_STATE.DOWNLOADING &&
+            item.state !== MODEL_STATE.NOT_DOWNLOADED
          );
        },
        onClick: async (item) => {
@ -539,7 +544,7 @@ export function useModelActions({
      },
      {
        name: i18n.translate('xpack.ml.inference.modelsList.testModelActionLabel', {
-          defaultMessage: 'Test model',
+          defaultMessage: 'Test',
        }),
        description: i18n.translate('xpack.ml.inference.modelsList.testModelActionLabel', {
          defaultMessage: 'Test model',
--- a/x-pack/plugins/ml/public/plugin.ts
+++ b/x-pack/plugins/ml/public/plugin.ts
@ -68,6 +68,8 @@ import {
  type ConfigSchema,
  type ExperimentalFeatures,
  initExperimentalFeatures,
+  initModelDeploymentSettings,
+  type NLPSettings,
 } from '../common/constants/app';
 import type { ElasticModels } from './application/services/elastic_models_service';
 import type { MlApi } from './application/services/ml_api_service';
@ -135,11 +137,31 @@ export class MlPlugin implements Plugin<MlPluginSetup, MlPluginStart> {
  private experimentalFeatures: ExperimentalFeatures = {
    ruleFormV2: false,
  };
+  private nlpSettings: NLPSettings = {
+    modelDeployment: {
+      allowStaticAllocations: true,
+      vCPURange: {
+        low: {
+          min: 0,
+          max: 2,
+        },
+        medium: {
+          min: 1,
+          max: 16,
+        },
+        high: {
+          min: 1,
+          max: 32,
+        },
+      },
+    },
+  };

  constructor(private initializerContext: PluginInitializerContext<ConfigSchema>) {
    this.isServerless = initializerContext.env.packageInfo.buildFlavor === 'serverless';
    initEnabledFeatures(this.enabledFeatures, initializerContext.config.get());
    initExperimentalFeatures(this.experimentalFeatures, initializerContext.config.get());
+    initModelDeploymentSettings(this.nlpSettings, initializerContext.config.get());
  }

  setup(
@ -194,7 +216,8 @@ export class MlPlugin implements Plugin<MlPluginSetup, MlPluginStart> {
          params,
          this.isServerless,
          this.enabledFeatures,
-          this.experimentalFeatures
+          this.experimentalFeatures,
+          this.nlpSettings
        );
      },
    });
--- a/x-pack/plugins/ml/server/config_schema.ts
+++ b/x-pack/plugins/ml/server/config_schema.ts
@ -20,10 +20,30 @@ const compatibleModuleTypeSchema = schema.maybe(
  ])
 );

+const vCPURangeSchema = schema.object({
+  min: schema.number(),
+  max: schema.number(),
+  static: schema.maybe(schema.number()),
+});
+
 export const configSchema = schema.object({
  ad: enabledSchema,
  dfa: enabledSchema,
-  nlp: enabledSchema,
+  nlp: schema.maybe(
+    schema.object({
+      enabled: schema.boolean(),
+      modelDeployment: schema.maybe(
+        schema.object({
+          allowStaticAllocations: schema.boolean(),
+          vCPURange: schema.object({
+            low: vCPURangeSchema,
+            medium: vCPURangeSchema,
+            high: vCPURangeSchema,
+          }),
+        })
+      ),
+    })
+  ),
  compatibleModuleType: compatibleModuleTypeSchema,
  experimental: schema.maybe(
    schema.object({
--- a/x-pack/plugins/ml/server/index.ts
+++ b/x-pack/plugins/ml/server/index.ts
@ -33,7 +33,10 @@ export const config: PluginConfigDescriptor<ConfigSchema> = {
  exposeToBrowser: {
    ad: true,
    dfa: true,
-    nlp: true,
+    nlp: {
+      enabled: true,
+      modelDeployment: true,
+    },
    experimental: true,
  },
 };
--- a/x-pack/test/functional/services/ml/trained_models_table.ts
+++ b/x-pack/test/functional/services/ml/trained_models_table.ts
@ -78,6 +78,15 @@ export function TrainedModelsTableProvider(
      return rows;
    }

+    /**
+     * Maps the vCPU level to the corresponding value in the slider.
+     */
+    public readonly vCPULevelValueMap = {
+      low: 0.5,
+      medium: 1.5,
+      high: 2.5,
+    };
+
    public rowSelector(modelId: string, subSelector?: string) {
      const row = `~mlModelsTable > ~row-${modelId}`;
      return !subSelector ? row : `${row} > ${subSelector}`;
@ -512,13 +521,25 @@ export function TrainedModelsTableProvider(
    }

    public async setVCPULevel(value: 'low' | 'medium' | 'high') {
-      const valuesMap = {
-        low: 0.5,
-        medium: 1.5,
-        high: 2.5,
-      };
-      await mlCommonUI.setSliderValue('mlModelsStartDeploymentModalVCPULevel', valuesMap[value]);
-      await mlCommonUI.assertSliderValue('mlModelsStartDeploymentModalVCPULevel', valuesMap[value]);
+      await mlCommonUI.setSliderValue(
+        'mlModelsStartDeploymentModalVCPULevel',
+        this.vCPULevelValueMap[value]
+      );
+      await this.assertVCPULevel(value);
+    }
+
+    public async assertVCPULevel(value: 'low' | 'medium' | 'high') {
+      await mlCommonUI.assertSliderValue(
+        'mlModelsStartDeploymentModalVCPULevel',
+        this.vCPULevelValueMap[value]
+      );
+    }
+
+    public async assertVCPUHelperText(expectedText: string) {
+      const helperText = await testSubjects.getVisibleText(
+        'mlModelsStartDeploymentModalVCPUHelperText'
+      );
+      expect(expectedText).to.eql(helperText);
    }

    public async assertAdvancedConfigurationOpen(expectedValue: boolean) {
@ -544,6 +565,33 @@ export function TrainedModelsTableProvider(
      await this.assertAdvancedConfigurationOpen(open);
    }

+    public async assertAdaptiveResourcesSwitchExists(expectExist: boolean) {
+      if (expectExist) {
+        await testSubjects.existOrFail('mlModelsStartDeploymentModalAdaptiveResources');
+      } else {
+        await testSubjects.missingOrFail('mlModelsStartDeploymentModalAdaptiveResources');
+      }
+    }
+
+    public async toggleAdaptiveResourcesSwitch(enabled: boolean) {
+      await mlCommonUI.toggleSwitchIfNeeded(
+        'mlModelsStartDeploymentModalAdaptiveResources',
+        enabled
+      );
+
+      await this.assertAdaptiveResourcesSwitchChecked(enabled);
+    }
+
+    public async assertAdaptiveResourcesSwitchChecked(expectedValue: boolean) {
+      const isChecked = await testSubjects.isEuiSwitchChecked(
+        'mlModelsStartDeploymentModalAdaptiveResources'
+      );
+      expect(isChecked).to.eql(
+        expectedValue,
+        `Expected adaptive resources switch to be ${expectedValue ? 'checked' : 'unchecked'}`
+      );
+    }
+
    public async startDeploymentWithParams(
      modelId: string,
      params: {
--- a/x-pack/test_serverless/functional/test_suites/search/ml/trained_models_list.ts
+++ b/x-pack/test_serverless/functional/test_suites/search/ml/trained_models_list.ts
@ -4,18 +4,28 @@
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */
+import { SUPPORTED_TRAINED_MODELS } from '@kbn/test-suites-xpack/functional/services/ml/api';
 import { FtrProviderContext } from '../../../ftr_provider_context';

 export default function ({ getService, getPageObjects }: FtrProviderContext) {
  const ml = getService('ml');
  const PageObjects = getPageObjects(['svlCommonPage']);

-  describe('Trained models list', () => {
+  describe('Trained models list', function () {
+    const tinyElser = SUPPORTED_TRAINED_MODELS.TINY_ELSER;
+
    before(async () => {
      await PageObjects.svlCommonPage.loginWithPrivilegedRole();
+      await ml.api.importTrainedModel(tinyElser.name, tinyElser.name);
+      // Make sure the .ml-stats index is created in advance, see https://github.com/elastic/elasticsearch/issues/65846
+      await ml.api.assureMlStatsIndexExists();
      await ml.api.syncSavedObjects();
    });

+    after(async () => {
+      await ml.api.deleteAllTrainedModelsES();
+    });
+
    describe('page navigation', () => {
      it('renders trained models list', async () => {
        await ml.navigation.navigateToMl();
@ -24,9 +34,42 @@ export default function ({ getService, getPageObjects }: FtrProviderContext) {
        await ml.testExecution.logTestStep(
          'should display the stats bar and the analytics table with 1 installed trained model and built in elser models in the table'
        );
-        await ml.trainedModels.assertStats(1);
+        await ml.trainedModels.assertStats(2);
        await ml.trainedModelsTable.assertTableIsPopulated();
      });
    });
+
+    describe('trained models table', () => {
+      it('sets correct VCU ranges for start model deployment', async () => {
+        await ml.trainedModelsTable.openStartDeploymentModal(tinyElser.name);
+        await ml.trainedModelsTable.toggleAdvancedConfiguration(true);
+
+        await ml.testExecution.logTestStep('should have correct default VCU level');
+        // Assert that the default selected level is Low
+        await ml.trainedModelsTable.assertVCPULevel('low');
+        // Assert VCU levels values
+        await ml.trainedModelsTable.assertVCPUHelperText(
+          'This level limits resources to 16 VCUs, which may be suitable for development, testing, and demos depending on your parameters. It is not recommended for production use.'
+        );
+
+        await ml.testExecution.logTestStep(
+          'should set control to high VCU level and update helper text'
+        );
+        await ml.trainedModelsTable.setVCPULevel('high');
+        await ml.trainedModelsTable.assertVCPUHelperText(
+          'Your model will scale up to a maximum of 4,096 VCUs per hour based on your search or ingest load. It will automatically scale down when demand decreases, and you only pay for the resources you use.'
+        );
+
+        // Adaptive resources switch should be checked by default
+        await ml.trainedModelsTable.assertAdaptiveResourcesSwitchChecked(true);
+
+        // Static allocations should be allowed for search projects
+        await ml.trainedModelsTable.toggleAdaptiveResourcesSwitch(false);
+
+        await ml.trainedModelsTable.assertVCPUHelperText(
+          'Your model will consume 4,096 VCUs, even when not in use.'
+        );
+      });
+    });
  });
 }
--- a/x-pack/test_serverless/functional/test_suites/security/ml/trained_models_list.ts
+++ b/x-pack/test_serverless/functional/test_suites/security/ml/trained_models_list.ts
@ -4,6 +4,7 @@
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */
+import { SUPPORTED_TRAINED_MODELS } from '@kbn/test-suites-xpack/functional/services/ml/api';
 import { ServerlessRoleName } from '../../../../shared/lib';
 import { FtrProviderContext } from '../../../ftr_provider_context';

@ -13,11 +14,20 @@ export default function ({ getService, getPageObjects }: FtrProviderContext) {
  const PageObjects = getPageObjects(['svlCommonPage']);

  describe('Trained models list', function () {
+    const tinyElser = SUPPORTED_TRAINED_MODELS.TINY_ELSER;
+
    before(async () => {
      await PageObjects.svlCommonPage.loginWithRole(ServerlessRoleName.PLATFORM_ENGINEER);
+      await ml.api.importTrainedModel(tinyElser.name, tinyElser.name);
+      // Make sure the .ml-stats index is created in advance, see https://github.com/elastic/elasticsearch/issues/65846
+      await ml.api.assureMlStatsIndexExists();
      await ml.api.syncSavedObjects();
    });

+    after(async () => {
+      await ml.api.deleteAllTrainedModelsES();
+    });
+
    describe('page navigation', () => {
      it('renders trained models list', async () => {
        await ml.navigation.navigateToMl();
@ -27,9 +37,35 @@ export default function ({ getService, getPageObjects }: FtrProviderContext) {
        await ml.testExecution.logTestStep(
          'should display the stats bar and the analytics table with one trained model'
        );
-        await ml.trainedModels.assertStats(1);
+        await ml.trainedModels.assertStats(2);
        await ml.trainedModelsTable.assertTableIsPopulated();
      });
    });
+
+    describe('trained models table', () => {
+      it('sets correct VCU ranges for start model deployment', async () => {
+        await ml.trainedModelsTable.openStartDeploymentModal(tinyElser.name);
+        await ml.trainedModelsTable.toggleAdvancedConfiguration(true);
+
+        // Adaptive resources switch should be hidden
+        await ml.trainedModelsTable.assertAdaptiveResourcesSwitchExists(false);
+
+        await ml.testExecution.logTestStep('should have correct default VCU level');
+        // Assert that the default selected level is Low
+        await ml.trainedModelsTable.assertVCPULevel('low');
+        // Assert VCU levels values
+        await ml.trainedModelsTable.assertVCPUHelperText(
+          'This level limits resources to 16 VCUs, which may be suitable for development, testing, and demos depending on your parameters. It is not recommended for production use.'
+        );
+
+        await ml.testExecution.logTestStep(
+          'should set control to high VCU level and update helper text'
+        );
+        await ml.trainedModelsTable.setVCPULevel('high');
+        await ml.trainedModelsTable.assertVCPUHelperText(
+          'Your model will scale up to a maximum of 1,024 VCUs per hour based on your search or ingest load. It will automatically scale down when demand decreases, and you only pay for the resources you use.'
+        );
+      });
+    });
  });
 }