[ML] Trained Models: Track model downloads and Deployment updates (#213699)

Part of https://github.com/elastic/kibana/issues/200725 This PR adds telemetry to track model downloads and deployment updates. It also includes tracking for failed deployments, as the previous implementation only tracked successful deployments.
2025-04-24 01:38:56 -04:00 · 2025-03-13 19:29:08 +01:00 · 2025-03-13 19:29:08 +01:00 · 482ac1ee76
commit 482ac1ee76
parent 69723619b8
7 changed files with 177 additions and 51 deletions
--- a/x-pack/platform/plugins/shared/ml/public/application/model_management/trained_models_service.test.ts
+++ b/x-pack/platform/plugins/shared/ml/public/application/model_management/trained_models_service.test.ts
@ -323,7 +323,31 @@ describe('TrainedModelsService', () => {
  });

  it('updates model deployment successfully', async () => {
-    mockTrainedModelsApiService.updateModelDeployment.mockResolvedValueOnce({ acknowledge: true });
+    mockTrainedModelsApiService.updateModelDeployment.mockResolvedValueOnce({
+      assignment: {
+        assignment_state: 'started',
+        routing_table: {
+          'node-1': {
+            routing_state: 'started',
+            reason: '',
+            current_allocations: 1,
+            target_allocations: 1,
+          },
+        },
+        start_time: 1234567890,
+        task_parameters: {
+          model_id: 'test-model',
+          model_bytes: 1000,
+          priority: 'normal',
+          number_of_allocations: 1,
+          threads_per_allocation: 1,
+          queue_capacity: 1024,
+          deployment_id: 'my-deployment-id',
+          per_deployment_memory_bytes: '1mb',
+          per_allocation_memory_bytes: '1mb',
+        },
+      },
+    });

    trainedModelsService.updateModelDeployment('test-model', deploymentParamsUiMock);
    await flushPromises();
--- a/x-pack/platform/plugins/shared/ml/public/application/model_management/trained_models_service.ts
+++ b/x-pack/platform/plugins/shared/ml/public/application/model_management/trained_models_service.ts
@ -42,6 +42,7 @@ import {
 import type {
  UpdateAllocationParams,
  DeleteModelParams,
+  StartAllocationParams,
 } from '../services/ml_api_service/trained_models';
 import { type TrainedModelsApiService } from '../services/ml_api_service/trained_models';
 import type { SavedObjectsApiService } from '../services/ml_api_service/saved_objects';
@ -170,12 +171,10 @@ export class TrainedModelsService {
  }

  public downloadModel(modelId: string) {
-    this.downloadInProgress.add(modelId);
    this._isLoading$.next(true);
    from(this.trainedModelsApiService.installElasticTrainedModelConfig(modelId))
      .pipe(
        finalize(() => {
-          this.downloadInProgress.delete(modelId);
          this.fetchModels();
        })
      )
@ -188,44 +187,65 @@ export class TrainedModelsService {
              values: { modelId },
            })
          );
+          this.telemetryService.trackTrainedModelsModelDownload({
+            model_id: modelId,
+            result: 'failure',
+          });
        },
      });
  }

  public updateModelDeployment(modelId: string, config: DeploymentParamsUI) {
+    const apiParams = this.deploymentParamsMapper.mapUiToApiDeploymentParams(modelId, config);
+
    from(
      this.trainedModelsApiService.updateModelDeployment(
        modelId,
        config.deploymentId!,
-        this.getUpdateModelAllocationParams(modelId, config)
+        this.getUpdateModelAllocationParams(apiParams)
      )
    )
      .pipe(
+        tap({
+          next: () => {
+            this.displaySuccessToast?.({
+              title: i18n.translate('xpack.ml.trainedModels.modelsList.updateSuccess', {
+                defaultMessage: 'Deployment updated',
+              }),
+              text: i18n.translate('xpack.ml.trainedModels.modelsList.updateSuccessText', {
+                defaultMessage: '"{deploymentId}" has been updated successfully.',
+                values: { deploymentId: config.deploymentId },
+              }),
+            });
+          },
+          error: (error) => {
+            this.displayErrorToast?.(
+              error,
+              i18n.translate('xpack.ml.trainedModels.modelsList.updateFailed', {
+                defaultMessage: 'Failed to update "{deploymentId}"',
+                values: { deploymentId: config.deploymentId },
+              })
+            );
+          },
+        }),
+        map(() => ({ success: true })),
+        catchError(() => of({ success: false })),
        finalize(() => {
          this.fetchModels();
        })
      )
-      .subscribe({
-        next: () => {
-          this.displaySuccessToast?.({
-            title: i18n.translate('xpack.ml.trainedModels.modelsList.updateSuccess', {
-              defaultMessage: 'Deployment updated',
-            }),
-            text: i18n.translate('xpack.ml.trainedModels.modelsList.updateSuccessText', {
-              defaultMessage: '"{deploymentId}" has been updated successfully.',
-              values: { deploymentId: config.deploymentId },
-            }),
-          });
-        },
-        error: (error) => {
-          this.displayErrorToast?.(
-            error,
-            i18n.translate('xpack.ml.trainedModels.modelsList.updateFailed', {
-              defaultMessage: 'Failed to update "{deploymentId}"',
-              values: { deploymentId: config.deploymentId },
-            })
-          );
-        },
+      .subscribe((result) => {
+        this.telemetryService.trackTrainedModelsDeploymentUpdated({
+          adaptive_resources: config.adaptiveResources,
+          model_id: modelId,
+          optimized: config.optimized,
+          vcpu_usage: config.vCPUUsage,
+          max_number_of_allocations: apiParams.adaptiveAllocationsParams?.max_number_of_allocations,
+          min_number_of_allocations: apiParams.adaptiveAllocationsParams?.min_number_of_allocations,
+          number_of_allocations: apiParams.deploymentParams.number_of_allocations,
+          threads_per_allocation: apiParams.deploymentParams.threads_per_allocation,
+          result: result.success ? 'success' : 'failure',
+        });
      });
  }

@ -486,20 +506,7 @@ export class TrainedModelsService {
        return firstValueFrom(
          this.trainedModelsApiService.startModelAllocation(apiParams).pipe(
            tap({
-              next: ({ assignment }) => {
-                this.telemetryService.trackTrainedModelsDeploymentCreated({
-                  model_id: apiParams.modelId,
-                  optimized: deployment.optimized,
-                  adaptive_resources: deployment.adaptiveResources,
-                  vcpu_usage: deployment.vCPUUsage,
-                  number_of_allocations: apiParams.deploymentParams.number_of_allocations,
-                  threads_per_allocation: assignment.task_parameters.threads_per_allocation,
-                  min_number_of_allocations:
-                    assignment.adaptive_allocations?.min_number_of_allocations,
-                  max_number_of_allocations:
-                    assignment.adaptive_allocations?.max_number_of_allocations,
-                });
-
+              next: () => {
                this.displaySuccessToast?.({
                  title: i18n.translate('xpack.ml.trainedModels.modelsList.startSuccess', {
                    defaultMessage: 'Deployment started',
@ -513,6 +520,7 @@ export class TrainedModelsService {
                });
              },
            }),
+            map(() => ({ success: true })),
            catchError((error) => {
              this.displayErrorToast?.(
                error,
@ -523,8 +531,24 @@ export class TrainedModelsService {
                  },
                })
              );
-              // Return null to allow stream to continue
-              return of(null);
+
+              // Return observable to allow stream to continue
+              return of({ success: false });
+            }),
+            tap((result) => {
+              this.telemetryService.trackTrainedModelsDeploymentCreated({
+                model_id: apiParams.modelId,
+                optimized: deployment.optimized,
+                adaptive_resources: deployment.adaptiveResources,
+                vcpu_usage: deployment.vCPUUsage,
+                number_of_allocations: apiParams.deploymentParams.number_of_allocations,
+                threads_per_allocation: apiParams.deploymentParams.threads_per_allocation,
+                min_number_of_allocations:
+                  apiParams.adaptiveAllocationsParams?.min_number_of_allocations,
+                max_number_of_allocations:
+                  apiParams.adaptiveAllocationsParams?.max_number_of_allocations,
+                result: result.success ? 'success' : 'failure',
+              });
            }),
            finalize(() => {
              this.removeScheduledDeployments({
@ -544,12 +568,7 @@ export class TrainedModelsService {
    );
  }

-  private getUpdateModelAllocationParams(
-    modelId: string,
-    uiParams: DeploymentParamsUI
-  ): UpdateAllocationParams {
-    const apiParams = this.deploymentParamsMapper.mapUiToApiDeploymentParams(modelId, uiParams);
-
+  private getUpdateModelAllocationParams(apiParams: StartAllocationParams): UpdateAllocationParams {
    return apiParams.adaptiveAllocationsParams
      ? {
          adaptive_allocations: apiParams.adaptiveAllocationsParams,
@ -624,9 +643,22 @@ export class TrainedModelsService {
                // Aborted
                this.abortedDownloads.delete(item.model_id);
                newItem.state = MODEL_STATE.NOT_DOWNLOADED;
+
+                this.telemetryService.trackTrainedModelsModelDownload({
+                  model_id: item.model_id,
+                  result: 'cancelled',
+                });
              } else if (downloadInProgress.has(item.model_id) || !item.state) {
                // Finished downloading
                newItem.state = MODEL_STATE.DOWNLOADED;
+
+                // Only track success if the model was downloading
+                if (downloadInProgress.has(item.model_id)) {
+                  this.telemetryService.trackTrainedModelsModelDownload({
+                    model_id: item.model_id,
+                    result: 'success',
+                  });
+                }
              }
              downloadInProgress.delete(item.model_id);
              return newItem;
@ -651,6 +683,13 @@ export class TrainedModelsService {
        error: (error) => {
          this.stopPolling();
          this.downloadStatusFetchInProgress = false;
+
+          downloadInProgress.forEach((modelId) => {
+            this.telemetryService.trackTrainedModelsModelDownload({
+              model_id: modelId,
+              result: 'failure',
+            });
+          });
        },
      });
  }
--- a/x-pack/platform/plugins/shared/ml/public/application/services/ml_api_service/trained_models.ts
+++ b/x-pack/platform/plugins/shared/ml/public/application/services/ml_api_service/trained_models.ts
@ -274,7 +274,7 @@ export function trainedModelsApiProvider(httpService: HttpService) {
    },

    updateModelDeployment(modelId: string, deploymentId: string, params: UpdateAllocationParams) {
-      return httpService.http<{ acknowledge: boolean }>({
+      return httpService.http<estypes.MlUpdateTrainedModelDeploymentResponse>({
        path: `${ML_INTERNAL_BASE_PATH}/trained_models/${modelId}/${deploymentId}/deployment/_update`,
        method: 'POST',
        body: JSON.stringify(params),
--- a/x-pack/platform/plugins/shared/ml/public/application/services/telemetry/events.ts
+++ b/x-pack/platform/plugins/shared/ml/public/application/services/telemetry/events.ts
@ -11,6 +11,7 @@ import {
  TrainedModelsTelemetryEventTypes,
  type TrainedModelsDeploymentEbtProps,
  type TrainedModelsTelemetryEvent,
+  type TrainedModelsModelDownloadEbtProps,
 } from './types';

 const trainedModelsDeploymentSchema: SchemaObject<TrainedModelsDeploymentEbtProps>['properties'] = {
@ -65,8 +66,30 @@ const trainedModelsDeploymentSchema: SchemaObject<TrainedModelsDeploymentEbtProp
      optional: true,
    },
  },
+  result: {
+    type: 'keyword',
+    _meta: {
+      description: 'The result of the deployment',
+    },
+  },
 };

+const trainedModelsModelDownloadSchema: SchemaObject<TrainedModelsModelDownloadEbtProps>['properties'] =
+  {
+    model_id: {
+      type: 'keyword',
+      _meta: {
+        description: 'The ID of the trained model',
+      },
+    },
+    result: {
+      type: 'keyword',
+      _meta: {
+        description: 'The result of the model download',
+      },
+    },
+  };
+
 const trainedModelsModelTestedSchema: SchemaObject<TrainedModelsModelTestedEbtProps>['properties'] =
  {
    model_id: {
@ -102,6 +125,16 @@ const trainedModelsDeploymentCreatedEventType: TrainedModelsTelemetryEvent = {
  schema: trainedModelsDeploymentSchema,
 };

+const trainedModelsModelDownloadEventType: TrainedModelsTelemetryEvent = {
+  eventType: TrainedModelsTelemetryEventTypes.MODEL_DOWNLOAD,
+  schema: trainedModelsModelDownloadSchema,
+};
+
+const trainedModelsDeploymentUpdatedEventType: TrainedModelsTelemetryEvent = {
+  eventType: TrainedModelsTelemetryEventTypes.DEPLOYMENT_UPDATED,
+  schema: trainedModelsDeploymentSchema,
+};
+
 const trainedModelsModelTestedEventType: TrainedModelsTelemetryEvent = {
  eventType: TrainedModelsTelemetryEventTypes.MODEL_TESTED,
  schema: trainedModelsModelTestedSchema,
@ -109,5 +142,7 @@ const trainedModelsModelTestedEventType: TrainedModelsTelemetryEvent = {

 export const trainedModelsEbtEvents = {
  trainedModelsDeploymentCreatedEventType,
+  trainedModelsModelDownloadEventType,
+  trainedModelsDeploymentUpdatedEventType,
  trainedModelsModelTestedEventType,
 };
--- a/x-pack/platform/plugins/shared/ml/public/application/services/telemetry/telemetry_client.ts
+++ b/x-pack/platform/plugins/shared/ml/public/application/services/telemetry/telemetry_client.ts
@ -9,6 +9,7 @@ import type { AnalyticsServiceSetup } from '@kbn/core-analytics-browser';
 import type {
  ITelemetryClient,
  TrainedModelsDeploymentEbtProps,
+  TrainedModelsModelDownloadEbtProps,
  TrainedModelsModelTestedEbtProps,
 } from './types';
 import { TrainedModelsTelemetryEventTypes } from './types';
@ -20,6 +21,13 @@ export class TelemetryClient implements ITelemetryClient {
    this.analytics.reportEvent(TrainedModelsTelemetryEventTypes.DEPLOYMENT_CREATED, eventProps);
  };

+  public trackTrainedModelsModelDownload = (eventProps: TrainedModelsModelDownloadEbtProps) => {
+    this.analytics.reportEvent(TrainedModelsTelemetryEventTypes.MODEL_DOWNLOAD, eventProps);
+  };
+
+  public trackTrainedModelsDeploymentUpdated = (eventProps: TrainedModelsDeploymentEbtProps) => {
+    this.analytics.reportEvent(TrainedModelsTelemetryEventTypes.DEPLOYMENT_UPDATED, eventProps);
+  };
  public trackTrainedModelsModelTested = (eventProps: TrainedModelsModelTestedEbtProps) => {
    this.analytics.reportEvent(TrainedModelsTelemetryEventTypes.MODEL_TESTED, eventProps);
  };
--- a/x-pack/platform/plugins/shared/ml/public/application/services/telemetry/telemetry_service.ts
+++ b/x-pack/platform/plugins/shared/ml/public/application/services/telemetry/telemetry_service.ts
@ -23,6 +23,8 @@ export class TelemetryService {
    this.analytics = analytics;

    analytics.registerEventType(trainedModelsEbtEvents.trainedModelsDeploymentCreatedEventType);
+    analytics.registerEventType(trainedModelsEbtEvents.trainedModelsModelDownloadEventType);
+    analytics.registerEventType(trainedModelsEbtEvents.trainedModelsDeploymentUpdatedEventType);
    analytics.registerEventType(trainedModelsEbtEvents.trainedModelsModelTestedEventType);
  }

--- a/x-pack/platform/plugins/shared/ml/public/application/services/telemetry/types.ts
+++ b/x-pack/platform/plugins/shared/ml/public/application/services/telemetry/types.ts
@ -10,13 +10,19 @@ import type { TrainedModelType } from '@kbn/ml-trained-models-utils';

 export interface TrainedModelsDeploymentEbtProps {
  model_id: string;
+  optimized: 'optimizedForIngest' | 'optimizedForSearch';
+  adaptive_resources: boolean;
+  vcpu_usage: 'low' | 'medium' | 'high';
+  result: 'success' | 'failure';
  max_number_of_allocations?: number;
  min_number_of_allocations?: number;
  threads_per_allocation: number;
  number_of_allocations?: number;
-  optimized: 'optimizedForIngest' | 'optimizedForSearch';
-  adaptive_resources: boolean;
-  vcpu_usage: 'low' | 'medium' | 'high';
+}
+
+export interface TrainedModelsModelDownloadEbtProps {
+  model_id: string;
+  result: 'success' | 'failure' | 'cancelled';
 }

 export interface TrainedModelsModelTestedEbtProps {
@ -29,6 +35,8 @@ export interface TrainedModelsModelTestedEbtProps {
 export enum TrainedModelsTelemetryEventTypes {
  DEPLOYMENT_CREATED = 'Trained Models Deployment Created',
  MODEL_TESTED = 'Trained Model Tested',
+  MODEL_DOWNLOAD = 'Trained Models Model Download',
+  DEPLOYMENT_UPDATED = 'Trained Models Deployment Updated',
 }

 export type TrainedModelsTelemetryEvent =
@ -39,9 +47,19 @@ export type TrainedModelsTelemetryEvent =
  | {
      eventType: TrainedModelsTelemetryEventTypes.MODEL_TESTED;
      schema: RootSchema<TrainedModelsModelTestedEbtProps>;
+    }
+  | {
+      eventType: TrainedModelsTelemetryEventTypes.MODEL_DOWNLOAD;
+      schema: RootSchema<TrainedModelsModelDownloadEbtProps>;
+    }
+  | {
+      eventType: TrainedModelsTelemetryEventTypes.DEPLOYMENT_UPDATED;
+      schema: RootSchema<TrainedModelsDeploymentEbtProps>;
    };

 export interface ITelemetryClient {
  trackTrainedModelsDeploymentCreated: (eventProps: TrainedModelsDeploymentEbtProps) => void;
+  trackTrainedModelsModelDownload: (eventProps: TrainedModelsModelDownloadEbtProps) => void;
+  trackTrainedModelsDeploymentUpdated: (eventProps: TrainedModelsDeploymentEbtProps) => void;
  trackTrainedModelsModelTested: (eventProps: TrainedModelsModelTestedEbtProps) => void;
 }