[ML] Trained Models: Hide Adaptive Allocations Toggle in Serverless (#224097)

Resolves https://github.com/elastic/kibana/issues/221894 Removes the adaptive allocations toggle in serverless (always enabled). Adjusts the `min_allocations` param: * Observability - min: 0 * Search - min: 0 * Security - min: 1
2025-06-28 03:01:21 -04:00 · 2025-06-18 15:52:15 +02:00 · 2025-06-18 15:52:15 +02:00 · f79b68d84e
commit f79b68d84e
parent c38269744b
6 changed files with 60 additions and 21 deletions
--- a/config/serverless.es.yml
+++ b/config/serverless.es.yml
@ -85,18 +85,18 @@ xpack.ml.dfa.enabled: false
 xpack.ml.nlp:
  enabled: true
  modelDeployment:
-    allowStaticAllocations: true
+    allowStaticAllocations: false
    vCPURange:
      low:
        min: 0
        max: 2
        static: 2
      medium:
-        min: 1
+        min: 0
        max: 32
        static: 32
      high:
-        min: 1
+        min: 0
        max: 512
        static: 512
 xpack.ml.compatibleModuleType: 'search'
--- a/config/serverless.oblt.yml
+++ b/config/serverless.oblt.yml
@ -119,10 +119,10 @@ xpack.ml.nlp:
        min: 0
        max: 2
      medium:
-        min: 1
+        min: 0
        max: 32
      high:
-        min: 1
+        min: 0
        max: 128
 xpack.ml.compatibleModuleType: 'observability'
--- a/x-pack/platform/plugins/shared/ml/public/application/model_management/deployment_params_mapper.test.ts
+++ b/x-pack/platform/plugins/shared/ml/public/application/model_management/deployment_params_mapper.test.ts
@ -48,12 +48,12 @@ describe('DeploymentParamsMapper', () => {
          static: 16,
        });
        expect(mapper.getVCURange('medium')).toEqual({
-          min: 8,
+          min: 0,
          max: 256,
          static: 256,
        });
        expect(mapper.getVCURange('high')).toEqual({
-          min: 8,
+          min: 0,
          max: 4096,
          static: 4096,
        });
@ -93,6 +93,48 @@ describe('DeploymentParamsMapper', () => {
            number_of_allocations: 2,
          },
        });
        expect(
          mapper.mapUiToApiDeploymentParams(modelId, {
            deploymentId: 'test-deployment',
            optimized: 'optimizedForSearch',
            adaptiveResources: true,
            vCPUUsage: 'medium',
          })
        ).toEqual({
          modelId: 'test-model',
          deploymentParams: {
            deployment_id: 'test-deployment',
            priority: 'normal',
            threads_per_allocation: 16,
          },
          adaptiveAllocationsParams: {
            enabled: true,
            max_number_of_allocations: 2,
            min_number_of_allocations: 0,
          },
        });
        expect(
          mapper.mapUiToApiDeploymentParams(modelId, {
            deploymentId: 'test-deployment',
            optimized: 'optimizedForIngest',
            adaptiveResources: true,
            vCPUUsage: 'high',
          })
        ).toEqual({
          modelId: 'test-model',
          deploymentParams: {
            deployment_id: 'test-deployment',
            priority: 'normal',
            threads_per_allocation: 1,
          },
          adaptiveAllocationsParams: {
            enabled: true,
            max_number_of_allocations: 512,
            min_number_of_allocations: 0,
          },
        });
      });
      it('overrides vCPUs levels and enforces adaptive allocations if static support is not configured', () => {
--- a/x-pack/platform/plugins/shared/ml/public/application/model_management/deployment_params_mapper.ts
+++ b/x-pack/platform/plugins/shared/ml/public/application/model_management/deployment_params_mapper.ts
@ -56,8 +56,8 @@ export class DeploymentParamsMapper {
   */
  private readonly serverlessVCPUBreakpoints: VCPUBreakpoints = {
    low: { min: this.minAllowedNumberOfAllocation, max: 2, static: 2 },
-    medium: { min: 1, max: 32, static: 32 },
+    medium: { min: 0, max: 32, static: 32 },
-    high: { min: 1, max: 512, static: 512 },
+    high: { min: 0, max: 512, static: 512 },
  };
  /**
@ -152,8 +152,11 @@ export class DeploymentParamsMapper {
      number_of_allocations: maxValue,
      min_number_of_allocations:
        Math.floor(levelValues.min / threadsPerAllocation) ||
-        // in any env, allow scale down to 0 only for "low" vCPU usage
+        // For serverless env, always allow scale down to 0
-        (params.vCPUUsage === 'low' ? this.minAllowedNumberOfAllocation : 1),
+        // For other envs, allow scale down to 0 only for "low" vCPU usage
        (this.showNodeInfo === false || params.vCPUUsage === 'low'
          ? this.minAllowedNumberOfAllocation
          : 1),
      max_number_of_allocations: maxValue,
    };
  }
--- a/x-pack/platform/plugins/shared/ml/public/application/model_management/deployment_setup.tsx
+++ b/x-pack/platform/plugins/shared/ml/public/application/model_management/deployment_setup.tsx
@ -818,7 +818,7 @@ export const StartUpdateDeploymentModal: FC<StartDeploymentModalProps> = ({
          errors={errors}
          isUpdate={isUpdate}
          disableAdaptiveResourcesControl={
-            showNodeInfo ? false : !nlpSettings.modelDeployment.allowStaticAllocations
+            !showNodeInfo || !nlpSettings.modelDeployment.allowStaticAllocations
          }
          deploymentsParams={
            isModelNotDownloaded || !isNLPModelItem(model)
--- a/x-pack/test_serverless/functional/test_suites/search/ml/trained_models_list.ts
+++ b/x-pack/test_serverless/functional/test_suites/search/ml/trained_models_list.ts
@ -60,15 +60,9 @@ export default function ({ getService, getPageObjects, getPageObject }: FtrProvi
          'Your model will scale up to a maximum of 4,096 VCUs per hour based on your search or ingest load. It will automatically scale down when demand decreases, and you only pay for the resources you use.'
        );
-        // Adaptive resources switch should be checked by default
+        // Adaptive resources switch should be hidden
-        await ml.trainedModelsTable.assertAdaptiveResourcesSwitchChecked(true);
+        // always use adaptive resources for serverless projects
-
+        await ml.trainedModelsTable.assertAdaptiveResourcesSwitchExists(false);
        // Static allocations should be allowed for search projects
        await ml.trainedModelsTable.toggleAdaptiveResourcesSwitch(false);
        await ml.trainedModelsTable.assertVCPUHelperText(
          'Your model will consume 4,096 VCUs, even when not in use.'
        );
      });
    });
  });