[ML] Trained Models: Hide Adaptive Allocations Toggle in Serverless (#224097)

Resolves https://github.com/elastic/kibana/issues/221894 Removes the adaptive allocations toggle in serverless (always enabled). Adjusts the `min_allocations` param: * Observability - min: 0 * Search - min: 0 * Security - min: 1
2025-06-27 18:51:07 -04:00 · 2025-06-18 15:52:15 +02:00 · 2025-06-18 15:52:15 +02:00 · f79b68d84e
commit f79b68d84e
parent c38269744b
6 changed files with 60 additions and 21 deletions
--- a/config/serverless.es.yml
+++ b/config/serverless.es.yml
@ -85,18 +85,18 @@ xpack.ml.dfa.enabled: false
 xpack.ml.nlp:
  enabled: true
  modelDeployment:
-    allowStaticAllocations: true
+    allowStaticAllocations: false
    vCPURange:
      low:
        min: 0
        max: 2
        static: 2
      medium:
-        min: 1
+        min: 0
        max: 32
        static: 32
      high:
-        min: 1
+        min: 0
        max: 512
        static: 512
 xpack.ml.compatibleModuleType: 'search'
--- a/config/serverless.oblt.yml
+++ b/config/serverless.oblt.yml
@ -119,10 +119,10 @@ xpack.ml.nlp:
        min: 0
        max: 2
      medium:
-        min: 1
+        min: 0
        max: 32
      high:
-        min: 1
+        min: 0
        max: 128
 xpack.ml.compatibleModuleType: 'observability'

--- a/x-pack/platform/plugins/shared/ml/public/application/model_management/deployment_params_mapper.test.ts
+++ b/x-pack/platform/plugins/shared/ml/public/application/model_management/deployment_params_mapper.test.ts
@ -48,12 +48,12 @@ describe('DeploymentParamsMapper', () => {
          static: 16,
        });
        expect(mapper.getVCURange('medium')).toEqual({
-          min: 8,
+          min: 0,
          max: 256,
          static: 256,
        });
        expect(mapper.getVCURange('high')).toEqual({
-          min: 8,
+          min: 0,
          max: 4096,
          static: 4096,
        });
@ -93,6 +93,48 @@ describe('DeploymentParamsMapper', () => {
            number_of_allocations: 2,
          },
        });
+
+        expect(
+          mapper.mapUiToApiDeploymentParams(modelId, {
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: true,
+            vCPUUsage: 'medium',
+          })
+        ).toEqual({
+          modelId: 'test-model',
+          deploymentParams: {
+            deployment_id: 'test-deployment',
+            priority: 'normal',
+            threads_per_allocation: 16,
+          },
+          adaptiveAllocationsParams: {
+            enabled: true,
+            max_number_of_allocations: 2,
+            min_number_of_allocations: 0,
+          },
+        });
+
+        expect(
+          mapper.mapUiToApiDeploymentParams(modelId, {
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForIngest',
+            adaptiveResources: true,
+            vCPUUsage: 'high',
+          })
+        ).toEqual({
+          modelId: 'test-model',
+          deploymentParams: {
+            deployment_id: 'test-deployment',
+            priority: 'normal',
+            threads_per_allocation: 1,
+          },
+          adaptiveAllocationsParams: {
+            enabled: true,
+            max_number_of_allocations: 512,
+            min_number_of_allocations: 0,
+          },
+        });
      });

      it('overrides vCPUs levels and enforces adaptive allocations if static support is not configured', () => {
--- a/x-pack/platform/plugins/shared/ml/public/application/model_management/deployment_params_mapper.ts
+++ b/x-pack/platform/plugins/shared/ml/public/application/model_management/deployment_params_mapper.ts
@ -56,8 +56,8 @@ export class DeploymentParamsMapper {
   */
  private readonly serverlessVCPUBreakpoints: VCPUBreakpoints = {
    low: { min: this.minAllowedNumberOfAllocation, max: 2, static: 2 },
-    medium: { min: 1, max: 32, static: 32 },
-    high: { min: 1, max: 512, static: 512 },
+    medium: { min: 0, max: 32, static: 32 },
+    high: { min: 0, max: 512, static: 512 },
  };

  /**
@ -152,8 +152,11 @@ export class DeploymentParamsMapper {
      number_of_allocations: maxValue,
      min_number_of_allocations:
        Math.floor(levelValues.min / threadsPerAllocation) ||
-        // in any env, allow scale down to 0 only for "low" vCPU usage
-        (params.vCPUUsage === 'low' ? this.minAllowedNumberOfAllocation : 1),
+        // For serverless env, always allow scale down to 0
+        // For other envs, allow scale down to 0 only for "low" vCPU usage
+        (this.showNodeInfo === false || params.vCPUUsage === 'low'
+          ? this.minAllowedNumberOfAllocation
+          : 1),
      max_number_of_allocations: maxValue,
    };
  }
--- a/x-pack/platform/plugins/shared/ml/public/application/model_management/deployment_setup.tsx
+++ b/x-pack/platform/plugins/shared/ml/public/application/model_management/deployment_setup.tsx
@ -818,7 +818,7 @@ export const StartUpdateDeploymentModal: FC<StartDeploymentModalProps> = ({
          errors={errors}
          isUpdate={isUpdate}
          disableAdaptiveResourcesControl={
-            showNodeInfo ? false : !nlpSettings.modelDeployment.allowStaticAllocations
+            !showNodeInfo || !nlpSettings.modelDeployment.allowStaticAllocations
          }
          deploymentsParams={
            isModelNotDownloaded || !isNLPModelItem(model)
--- a/x-pack/test_serverless/functional/test_suites/search/ml/trained_models_list.ts
+++ b/x-pack/test_serverless/functional/test_suites/search/ml/trained_models_list.ts
@ -60,15 +60,9 @@ export default function ({ getService, getPageObjects, getPageObject }: FtrProvi
          'Your model will scale up to a maximum of 4,096 VCUs per hour based on your search or ingest load. It will automatically scale down when demand decreases, and you only pay for the resources you use.'
        );

-        // Adaptive resources switch should be checked by default
-        await ml.trainedModelsTable.assertAdaptiveResourcesSwitchChecked(true);
-
-        // Static allocations should be allowed for search projects
-        await ml.trainedModelsTable.toggleAdaptiveResourcesSwitch(false);
-
-        await ml.trainedModelsTable.assertVCPUHelperText(
-          'Your model will consume 4,096 VCUs, even when not in use.'
-        );
+        // Adaptive resources switch should be hidden
+        // always use adaptive resources for serverless projects
+        await ml.trainedModelsTable.assertAdaptiveResourcesSwitchExists(false);
      });
    });
  });