add pipelineReferences to ml processors results (#143701)

Added a list of pipelines that reference the ml inference processors so that we can determine if a processor is re-used on the pipelines page. This will allow us to disable the delete action if the processor is used more than 1 time.
2025-04-24 01:38:56 -04:00 · 2022-10-20 10:36:32 -05:00 · 2022-10-20 10:36:32 -05:00 · d08e119c71
commit d08e119c71
parent e01b6bd5b6
5 changed files with 145 additions and 46 deletions
--- a/x-pack/plugins/enterprise_search/common/types/pipelines.ts
+++ b/x-pack/plugins/enterprise_search/common/types/pipelines.ts
@ -12,6 +12,7 @@ export interface InferencePipeline {
  modelState: TrainedModelState;
  modelStateReason?: string;
  pipelineName: string;
+  pipelineReferences: string[];
  types: string[];
 }

--- a/x-pack/plugins/enterprise_search/public/applications/enterprise_search_content/components/search_index/pipelines/inference_pipeline_card.test.tsx
+++ b/x-pack/plugins/enterprise_search/public/applications/enterprise_search_content/components/search_index/pipelines/inference_pipeline_card.test.tsx
@ -22,6 +22,7 @@ export const DEFAULT_VALUES: InferencePipeline = {
  modelId: 'sample-bert-ner-model',
  modelState: TrainedModelState.Started,
  pipelineName: 'Sample Processor',
+  pipelineReferences: [],
  types: ['pytorch', 'ner'],
 };

--- a/x-pack/plugins/enterprise_search/public/applications/enterprise_search_content/components/search_index/pipelines/ml_model_health.test.tsx
+++ b/x-pack/plugins/enterprise_search/public/applications/enterprise_search_content/components/search_index/pipelines/ml_model_health.test.tsx
@ -27,6 +27,7 @@ describe('TrainedModelHealth', () => {
    modelId: 'sample-bert-ner-model',
    modelState: TrainedModelState.NotDeployed,
    pipelineName: 'Sample Processor',
+    pipelineReferences: [],
    types: ['pytorch'],
  };
  it('renders model started', () => {
--- a/x-pack/plugins/enterprise_search/server/lib/indices/fetch_ml_inference_pipeline_processors.test.ts
+++ b/x-pack/plugins/enterprise_search/server/lib/indices/fetch_ml_inference_pipeline_processors.test.ts
@ -5,6 +5,7 @@
 * 2.0.
 */

+import { errors } from '@elastic/elasticsearch';
 import { ElasticsearchClient } from '@kbn/core/server';
 import { MlTrainedModels } from '@kbn/ml-plugin/server';

@ -13,7 +14,8 @@ import { InferencePipeline, TrainedModelState } from '../../../common/types/pipe
 import {
  fetchAndAddTrainedModelData,
  getMlModelConfigsForModelIds,
-  fetchMlInferencePipelineProcessorNames,
+  getMlInferencePipelineProcessorNamesFromPipelines,
+  fetchMlInferencePipelines,
  fetchMlInferencePipelineProcessors,
  fetchPipelineProcessorInferenceData,
  InferencePipelineData,
@ -247,23 +249,35 @@ const trainedModelDataObject: Record<string, InferencePipeline> = {
    modelId: 'trained-model-id-1',
    modelState: TrainedModelState.NotDeployed,
    pipelineName: 'ml-inference-pipeline-1',
+    pipelineReferences: ['my-index@ml-inference'],
    types: ['lang_ident', 'ner'],
  },
  'trained-model-id-2': {
    modelId: 'trained-model-id-2',
    modelState: TrainedModelState.Started,
    pipelineName: 'ml-inference-pipeline-2',
+    pipelineReferences: ['my-index@ml-inference'],
    types: ['pytorch', 'ner'],
  },
  'ml-inference-pipeline-3': {
    modelId: 'trained-model-id-1',
    modelState: TrainedModelState.NotDeployed,
    pipelineName: 'ml-inference-pipeline-3',
+    pipelineReferences: ['my-index@ml-inference'],
    types: ['lang_ident', 'ner'],
  },
 };

-describe('fetchMlInferencePipelineProcessorNames lib function', () => {
+const notFoundResponse = { meta: { statusCode: 404 } };
+const notFoundError = new errors.ResponseError({
+  body: notFoundResponse,
+  statusCode: 404,
+  headers: {},
+  meta: {} as any,
+  warnings: [],
+});
+
+describe('fetchMlInferencePipelines lib function', () => {
  const mockClient = {
    ingest: {
      getPipeline: jest.fn(),
@ -274,32 +288,58 @@ describe('fetchMlInferencePipelineProcessorNames lib function', () => {
    jest.clearAllMocks();
  });

-  it('should return pipeline processor names for the @ml-inference pipeline', async () => {
+  it('should return @ml-inference pipelines', async () => {
    mockClient.ingest.getPipeline.mockImplementation(() => Promise.resolve(mockGetPipeline));

-    const expected = ['ml-inference-pipeline-1'];
+    const response = await fetchMlInferencePipelines(mockClient as unknown as ElasticsearchClient);

-    const response = await fetchMlInferencePipelineProcessorNames(
-      mockClient as unknown as ElasticsearchClient,
-      'my-index'
-    );
-
-    expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({ id: 'my-index@ml-inference' });
-    expect(response).toEqual(expected);
+    expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({ id: '*@ml-inference' });
+    expect(response).toEqual(mockGetPipeline);
  });

-  it('should return an empty array for a missing @ml-inference pipeline', async () => {
-    mockClient.ingest.getPipeline.mockImplementation(() => Promise.resolve(mockGetPipeline));
+  it('should return an empty object when no @ml-inference pipelines found', async () => {
+    mockClient.ingest.getPipeline.mockImplementation(() => Promise.resolve({}));

-    const response = await fetchMlInferencePipelineProcessorNames(
-      mockClient as unknown as ElasticsearchClient,
-      'my-index-without-ml-inference-pipeline'
+    const response = await fetchMlInferencePipelines(mockClient as unknown as ElasticsearchClient);
+
+    expect(response).toEqual({});
+  });
+
+  it('should return an empty object when getPipeline throws an error ', async () => {
+    mockClient.ingest.getPipeline.mockImplementation(() => Promise.reject(notFoundError));
+
+    const response = await fetchMlInferencePipelines(mockClient as unknown as ElasticsearchClient);
+
+    expect(response).toEqual({});
+  });
+});
+
+describe('getMlInferencePipelineProcessorNamesFromPipelines', () => {
+  it('should return pipeline processor names for the @ml-inference pipeline', () => {
+    const expected = ['ml-inference-pipeline-1'];
+    const processorNames = getMlInferencePipelineProcessorNamesFromPipelines(
+      'my-index',
+      mockGetPipeline
+    );
+    expect(processorNames).toEqual(expected);
+  });
+  it('should return an empty array for a missing @ml-inference pipeline', () => {
+    const processorNames = getMlInferencePipelineProcessorNamesFromPipelines(
+      'my-index-without-ml-inference-pipeline',
+      mockGetPipeline
    );

-    expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({
-      id: 'my-index-without-ml-inference-pipeline@ml-inference',
-    });
-    expect(response).toEqual([]);
+    expect(processorNames).toEqual([]);
+  });
+  it('should return an empty array for a pipeline missing processors', () => {
+    const processorNames = getMlInferencePipelineProcessorNamesFromPipelines(
+      'my-index-without-ml-inference-pipeline',
+      {
+        'my-index-without-ml-inference-pipeline': {},
+      }
+    );
+
+    expect(processorNames).toEqual([]);
  });
 });

@ -322,6 +362,7 @@ describe('fetchPipelineProcessorInferenceData lib function', () => {
        modelId: 'trained-model-id-1',
        modelState: TrainedModelState.NotDeployed,
        pipelineName: 'ml-inference-pipeline-1',
+        pipelineReferences: ['my-index@ml-inference', 'other-index@ml-inference'],
        trainedModelName: 'trained-model-id-1',
        types: [],
      },
@ -329,6 +370,7 @@ describe('fetchPipelineProcessorInferenceData lib function', () => {
        modelId: 'trained-model-id-2',
        modelState: TrainedModelState.NotDeployed,
        pipelineName: 'ml-inference-pipeline-2',
+        pipelineReferences: ['my-index@ml-inference'],
        trainedModelName: 'trained-model-id-2',
        types: [],
      },
@ -336,7 +378,11 @@ describe('fetchPipelineProcessorInferenceData lib function', () => {

    const response = await fetchPipelineProcessorInferenceData(
      mockClient as unknown as ElasticsearchClient,
-      ['ml-inference-pipeline-1', 'ml-inference-pipeline-2', 'non-ml-inference-pipeline']
+      ['ml-inference-pipeline-1', 'ml-inference-pipeline-2', 'non-ml-inference-pipeline'],
+      {
+        'ml-inference-pipeline-1': ['my-index@ml-inference', 'other-index@ml-inference'],
+        'ml-inference-pipeline-2': ['my-index@ml-inference'],
+      }
    );

    expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({
@ -377,6 +423,7 @@ describe('getMlModelConfigsForModelIds lib function', () => {
        modelId: 'trained-model-id-1',
        modelState: TrainedModelState.Started,
        pipelineName: '',
+        pipelineReferences: [],
        trainedModelName: 'trained-model-id-1',
        types: ['pytorch', 'ner'],
      },
@ -384,6 +431,7 @@ describe('getMlModelConfigsForModelIds lib function', () => {
        modelId: 'trained-model-id-2',
        modelState: TrainedModelState.Started,
        pipelineName: '',
+        pipelineReferences: [],
        trainedModelName: 'trained-model-id-2',
        types: ['pytorch', 'ner'],
      },
@ -413,6 +461,7 @@ describe('getMlModelConfigsForModelIds lib function', () => {
        modelId: 'trained-model-id-1',
        modelState: TrainedModelState.Started,
        pipelineName: '',
+        pipelineReferences: [],
        trainedModelName: 'trained-model-id-1',
        types: ['pytorch', 'ner'],
      },
@ -420,6 +469,7 @@ describe('getMlModelConfigsForModelIds lib function', () => {
        modelId: 'trained-model-id-2',
        modelState: TrainedModelState.Started,
        pipelineName: '',
+        pipelineReferences: [],
        trainedModelName: 'trained-model-id-2',
        types: ['pytorch', 'ner'],
      },
@ -427,6 +477,7 @@ describe('getMlModelConfigsForModelIds lib function', () => {
        modelId: undefined, // Redacted
        modelState: TrainedModelState.Started,
        pipelineName: '',
+        pipelineReferences: [],
        trainedModelName: 'trained-model-id-3-in-other-space',
        types: ['pytorch', 'ner'],
      },
@ -483,6 +534,7 @@ describe('fetchAndAddTrainedModelData lib function', () => {
        modelId: 'trained-model-id-1',
        modelState: TrainedModelState.NotDeployed,
        pipelineName: 'ml-inference-pipeline-1',
+        pipelineReferences: [],
        trainedModelName: 'trained-model-id-1',
        types: [],
      },
@ -490,6 +542,7 @@ describe('fetchAndAddTrainedModelData lib function', () => {
        modelId: 'trained-model-id-2',
        modelState: TrainedModelState.NotDeployed,
        pipelineName: 'ml-inference-pipeline-2',
+        pipelineReferences: [],
        trainedModelName: 'trained-model-id-2',
        types: [],
      },
@ -497,6 +550,7 @@ describe('fetchAndAddTrainedModelData lib function', () => {
        modelId: 'trained-model-id-3',
        modelState: TrainedModelState.NotDeployed,
        pipelineName: 'ml-inference-pipeline-3',
+        pipelineReferences: [],
        trainedModelName: 'trained-model-id-3',
        types: [],
      },
@ -504,6 +558,7 @@ describe('fetchAndAddTrainedModelData lib function', () => {
        modelId: 'trained-model-id-4',
        modelState: TrainedModelState.NotDeployed,
        pipelineName: 'ml-inference-pipeline-4',
+        pipelineReferences: [],
        trainedModelName: 'trained-model-id-4',
        types: [],
      },
@ -514,6 +569,7 @@ describe('fetchAndAddTrainedModelData lib function', () => {
        modelId: 'trained-model-id-1',
        modelState: TrainedModelState.NotDeployed,
        pipelineName: 'ml-inference-pipeline-1',
+        pipelineReferences: [],
        trainedModelName: 'trained-model-id-1',
        types: ['lang_ident', 'ner'],
      },
@ -521,6 +577,7 @@ describe('fetchAndAddTrainedModelData lib function', () => {
        modelId: 'trained-model-id-2',
        modelState: TrainedModelState.Started,
        pipelineName: 'ml-inference-pipeline-2',
+        pipelineReferences: [],
        trainedModelName: 'trained-model-id-2',
        types: ['pytorch', 'ner'],
      },
@ -529,6 +586,7 @@ describe('fetchAndAddTrainedModelData lib function', () => {
        modelState: TrainedModelState.Failed,
        modelStateReason: 'something is wrong, boom',
        pipelineName: 'ml-inference-pipeline-3',
+        pipelineReferences: [],
        trainedModelName: 'trained-model-id-3',
        types: ['pytorch', 'text_classification'],
      },
@ -536,6 +594,7 @@ describe('fetchAndAddTrainedModelData lib function', () => {
        modelId: 'trained-model-id-4',
        modelState: TrainedModelState.Starting,
        pipelineName: 'ml-inference-pipeline-4',
+        pipelineReferences: [],
        trainedModelName: 'trained-model-id-4',
        types: ['pytorch', 'fill_mask'],
      },
@ -599,7 +658,7 @@ describe('fetchMlInferencePipelineProcessors lib function', () => {
      );

      expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({
-        id: 'index-with-no-ml-inference-pipeline@ml-inference',
+        id: '*@ml-inference',
      });
      expect(mockClient.ingest.getPipeline).toHaveBeenCalledTimes(1);
      expect(mockClient.ml.getTrainedModels).toHaveBeenCalledTimes(0);
@ -626,7 +685,7 @@ describe('fetchMlInferencePipelineProcessors lib function', () => {
      );

      expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({
-        id: 'my-index@ml-inference',
+        id: '*@ml-inference',
      });
      expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({
        id: 'ml-inference-pipeline-1',
@ -663,7 +722,7 @@ describe('fetchMlInferencePipelineProcessors lib function', () => {
      );

      expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({
-        id: 'my-index@ml-inference',
+        id: '*@ml-inference',
      });
      expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({
        id: 'ml-inference-pipeline-1',
@ -707,7 +766,7 @@ describe('fetchMlInferencePipelineProcessors lib function', () => {
      );

      expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({
-        id: 'my-index@ml-inference',
+        id: '*@ml-inference',
      });
      expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({
        id: 'ml-inference-pipeline-1,ml-inference-pipeline-3',
--- a/x-pack/plugins/enterprise_search/server/lib/indices/fetch_ml_inference_pipeline_processors.ts
+++ b/x-pack/plugins/enterprise_search/server/lib/indices/fetch_ml_inference_pipeline_processors.ts
@ -5,6 +5,7 @@
 * 2.0.
 */

+import { IngestGetPipelineResponse } from '@elastic/elasticsearch/lib/api/types';
 import { ElasticsearchClient } from '@kbn/core/server';
 import { MlTrainedModels } from '@kbn/ml-plugin/server';

@ -16,31 +17,62 @@ export type InferencePipelineData = InferencePipeline & {
  trainedModelName: string;
 };

-export const fetchMlInferencePipelineProcessorNames = async (
-  client: ElasticsearchClient,
-  indexName: string
-): Promise<string[]> => {
+export const fetchMlInferencePipelines = async (client: ElasticsearchClient) => {
  try {
-    const mlInferencePipelineName = getInferencePipelineNameFromIndexName(indexName);
-    const {
-      [mlInferencePipelineName]: { processors: mlInferencePipelineProcessors = [] },
-    } = await client.ingest.getPipeline({
-      id: mlInferencePipelineName,
+    return await client.ingest.getPipeline({
+      id: getInferencePipelineNameFromIndexName('*'),
    });
+  } catch (error) {
+    // The GET /_ingest/pipeline API returns an empty object on 404 Not Found. If there are no `@ml-inference`
+    // pipelines then return an empty record of pipelines
+    return {};
+  }
+};

-    return mlInferencePipelineProcessors
-      .map((obj) => obj.pipeline?.name)
-      .filter((name): name is string => name !== undefined);
-  } catch (err) {
-    // The GET /_ingest/pipeline API returns an empty object on 404 Not Found. If someone provides
-    // a bad index name, catch the error and return an empty array of names.
+export const getMlInferencePipelineProcessorNamesFromPipelines = (
+  indexName: string,
+  pipelines: IngestGetPipelineResponse
+): string[] => {
+  const mlInferencePipelineName = getInferencePipelineNameFromIndexName(indexName);
+  if (pipelines?.[mlInferencePipelineName]?.processors === undefined) {
    return [];
  }
+  const {
+    [mlInferencePipelineName]: { processors: mlInferencePipelineProcessors = [] },
+  } = pipelines;
+
+  return mlInferencePipelineProcessors
+    .map((obj) => obj.pipeline?.name)
+    .filter((name): name is string => name !== undefined);
+};
+
+export const getProcessorPipelineMap = (
+  pipelines: IngestGetPipelineResponse
+): Record<string, string[]> => {
+  const result: Record<string, string[]> = {};
+  const addPipelineToProcessorMap = (processorName: string, pipelineName: string) => {
+    if (processorName in result) {
+      result[processorName].push(pipelineName);
+    } else {
+      result[processorName] = [pipelineName];
+    }
+  };
+
+  Object.entries(pipelines).forEach(([name, pipeline]) =>
+    pipeline?.processors?.forEach((processor) => {
+      if (processor.pipeline?.name !== undefined) {
+        addPipelineToProcessorMap(processor.pipeline.name, name);
+      }
+    })
+  );
+
+  return result;
 };

 export const fetchPipelineProcessorInferenceData = async (
  client: ElasticsearchClient,
-  mlInferencePipelineProcessorNames: string[]
+  mlInferencePipelineProcessorNames: string[],
+  pipelineProcessorsMap: Record<string, string[]>
 ): Promise<InferencePipelineData[]> => {
  const mlInferencePipelineProcessorConfigs = await client.ingest.getPipeline({
    id: mlInferencePipelineProcessorNames.join(),
@ -61,6 +93,7 @@ export const fetchPipelineProcessorInferenceData = async (
          modelId: trainedModelName,
          modelState: TrainedModelState.NotDeployed,
          pipelineName: pipelineProcessorName,
+          pipelineReferences: pipelineProcessorsMap?.[pipelineProcessorName] ?? [],
          trainedModelName,
          types: [],
        });
@ -96,6 +129,7 @@ export const getMlModelConfigsForModelIds = async (
        modelId: modelNamesInCurrentSpace.includes(trainedModelName) ? trainedModelName : undefined,
        modelState: TrainedModelState.NotDeployed,
        pipelineName: '',
+        pipelineReferences: [],
        trainedModelName,
        types: getMlModelTypesForModelConfig(trainedModelData),
      };
@ -155,9 +189,9 @@ export const fetchAndAddTrainedModelData = async (
    return {
      ...data,
      modelId,
-      types,
      modelState,
      modelStateReason,
+      types,
    };
  });
 };
@ -171,9 +205,11 @@ export const fetchMlInferencePipelineProcessors = async (
    return Promise.reject(new Error('Machine Learning is not enabled'));
  }

-  const mlInferencePipelineProcessorNames = await fetchMlInferencePipelineProcessorNames(
-    client,
-    indexName
+  const allMlPipelines = await fetchMlInferencePipelines(client);
+  const pipelineProcessorsPipelineCountMap = getProcessorPipelineMap(allMlPipelines);
+  const mlInferencePipelineProcessorNames = getMlInferencePipelineProcessorNamesFromPipelines(
+    indexName,
+    allMlPipelines
  );

  // Elasticsearch's GET pipelines API call will return all of the pipeline data if no ids are
@ -183,7 +219,8 @@ export const fetchMlInferencePipelineProcessors = async (

  const pipelineProcessorInferenceData = await fetchPipelineProcessorInferenceData(
    client,
-    mlInferencePipelineProcessorNames
+    mlInferencePipelineProcessorNames,
+    pipelineProcessorsPipelineCountMap
  );

  // Elasticsearch's GET trained models and GET trained model stats API calls will return the