add pipelineReferences to ml processors results (#143701)

Added a list of pipelines that reference the ml inference processors so
that we can determine if a processor is re-used on the pipelines page.
This will allow us to disable the delete action if the processor is used
more than 1 time.
This commit is contained in:
Rodney Norris 2022-10-20 10:36:32 -05:00 committed by GitHub
parent e01b6bd5b6
commit d08e119c71
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 145 additions and 46 deletions

View file

@ -12,6 +12,7 @@ export interface InferencePipeline {
modelState: TrainedModelState;
modelStateReason?: string;
pipelineName: string;
pipelineReferences: string[];
types: string[];
}

View file

@ -22,6 +22,7 @@ export const DEFAULT_VALUES: InferencePipeline = {
modelId: 'sample-bert-ner-model',
modelState: TrainedModelState.Started,
pipelineName: 'Sample Processor',
pipelineReferences: [],
types: ['pytorch', 'ner'],
};

View file

@ -27,6 +27,7 @@ describe('TrainedModelHealth', () => {
modelId: 'sample-bert-ner-model',
modelState: TrainedModelState.NotDeployed,
pipelineName: 'Sample Processor',
pipelineReferences: [],
types: ['pytorch'],
};
it('renders model started', () => {

View file

@ -5,6 +5,7 @@
* 2.0.
*/
import { errors } from '@elastic/elasticsearch';
import { ElasticsearchClient } from '@kbn/core/server';
import { MlTrainedModels } from '@kbn/ml-plugin/server';
@ -13,7 +14,8 @@ import { InferencePipeline, TrainedModelState } from '../../../common/types/pipe
import {
fetchAndAddTrainedModelData,
getMlModelConfigsForModelIds,
fetchMlInferencePipelineProcessorNames,
getMlInferencePipelineProcessorNamesFromPipelines,
fetchMlInferencePipelines,
fetchMlInferencePipelineProcessors,
fetchPipelineProcessorInferenceData,
InferencePipelineData,
@ -247,23 +249,35 @@ const trainedModelDataObject: Record<string, InferencePipeline> = {
modelId: 'trained-model-id-1',
modelState: TrainedModelState.NotDeployed,
pipelineName: 'ml-inference-pipeline-1',
pipelineReferences: ['my-index@ml-inference'],
types: ['lang_ident', 'ner'],
},
'trained-model-id-2': {
modelId: 'trained-model-id-2',
modelState: TrainedModelState.Started,
pipelineName: 'ml-inference-pipeline-2',
pipelineReferences: ['my-index@ml-inference'],
types: ['pytorch', 'ner'],
},
'ml-inference-pipeline-3': {
modelId: 'trained-model-id-1',
modelState: TrainedModelState.NotDeployed,
pipelineName: 'ml-inference-pipeline-3',
pipelineReferences: ['my-index@ml-inference'],
types: ['lang_ident', 'ner'],
},
};
describe('fetchMlInferencePipelineProcessorNames lib function', () => {
const notFoundResponse = { meta: { statusCode: 404 } };
const notFoundError = new errors.ResponseError({
body: notFoundResponse,
statusCode: 404,
headers: {},
meta: {} as any,
warnings: [],
});
describe('fetchMlInferencePipelines lib function', () => {
const mockClient = {
ingest: {
getPipeline: jest.fn(),
@ -274,32 +288,58 @@ describe('fetchMlInferencePipelineProcessorNames lib function', () => {
jest.clearAllMocks();
});
it('should return pipeline processor names for the @ml-inference pipeline', async () => {
it('should return @ml-inference pipelines', async () => {
mockClient.ingest.getPipeline.mockImplementation(() => Promise.resolve(mockGetPipeline));
const expected = ['ml-inference-pipeline-1'];
const response = await fetchMlInferencePipelines(mockClient as unknown as ElasticsearchClient);
const response = await fetchMlInferencePipelineProcessorNames(
mockClient as unknown as ElasticsearchClient,
'my-index'
);
expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({ id: 'my-index@ml-inference' });
expect(response).toEqual(expected);
expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({ id: '*@ml-inference' });
expect(response).toEqual(mockGetPipeline);
});
it('should return an empty array for a missing @ml-inference pipeline', async () => {
mockClient.ingest.getPipeline.mockImplementation(() => Promise.resolve(mockGetPipeline));
it('should return an empty object when no @ml-inference pipelines found', async () => {
mockClient.ingest.getPipeline.mockImplementation(() => Promise.resolve({}));
const response = await fetchMlInferencePipelineProcessorNames(
mockClient as unknown as ElasticsearchClient,
'my-index-without-ml-inference-pipeline'
const response = await fetchMlInferencePipelines(mockClient as unknown as ElasticsearchClient);
expect(response).toEqual({});
});
it('should return an empty object when getPipeline throws an error ', async () => {
mockClient.ingest.getPipeline.mockImplementation(() => Promise.reject(notFoundError));
const response = await fetchMlInferencePipelines(mockClient as unknown as ElasticsearchClient);
expect(response).toEqual({});
});
});
describe('getMlInferencePipelineProcessorNamesFromPipelines', () => {
it('should return pipeline processor names for the @ml-inference pipeline', () => {
const expected = ['ml-inference-pipeline-1'];
const processorNames = getMlInferencePipelineProcessorNamesFromPipelines(
'my-index',
mockGetPipeline
);
expect(processorNames).toEqual(expected);
});
it('should return an empty array for a missing @ml-inference pipeline', () => {
const processorNames = getMlInferencePipelineProcessorNamesFromPipelines(
'my-index-without-ml-inference-pipeline',
mockGetPipeline
);
expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({
id: 'my-index-without-ml-inference-pipeline@ml-inference',
});
expect(response).toEqual([]);
expect(processorNames).toEqual([]);
});
it('should return an empty array for a pipeline missing processors', () => {
const processorNames = getMlInferencePipelineProcessorNamesFromPipelines(
'my-index-without-ml-inference-pipeline',
{
'my-index-without-ml-inference-pipeline': {},
}
);
expect(processorNames).toEqual([]);
});
});
@ -322,6 +362,7 @@ describe('fetchPipelineProcessorInferenceData lib function', () => {
modelId: 'trained-model-id-1',
modelState: TrainedModelState.NotDeployed,
pipelineName: 'ml-inference-pipeline-1',
pipelineReferences: ['my-index@ml-inference', 'other-index@ml-inference'],
trainedModelName: 'trained-model-id-1',
types: [],
},
@ -329,6 +370,7 @@ describe('fetchPipelineProcessorInferenceData lib function', () => {
modelId: 'trained-model-id-2',
modelState: TrainedModelState.NotDeployed,
pipelineName: 'ml-inference-pipeline-2',
pipelineReferences: ['my-index@ml-inference'],
trainedModelName: 'trained-model-id-2',
types: [],
},
@ -336,7 +378,11 @@ describe('fetchPipelineProcessorInferenceData lib function', () => {
const response = await fetchPipelineProcessorInferenceData(
mockClient as unknown as ElasticsearchClient,
['ml-inference-pipeline-1', 'ml-inference-pipeline-2', 'non-ml-inference-pipeline']
['ml-inference-pipeline-1', 'ml-inference-pipeline-2', 'non-ml-inference-pipeline'],
{
'ml-inference-pipeline-1': ['my-index@ml-inference', 'other-index@ml-inference'],
'ml-inference-pipeline-2': ['my-index@ml-inference'],
}
);
expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({
@ -377,6 +423,7 @@ describe('getMlModelConfigsForModelIds lib function', () => {
modelId: 'trained-model-id-1',
modelState: TrainedModelState.Started,
pipelineName: '',
pipelineReferences: [],
trainedModelName: 'trained-model-id-1',
types: ['pytorch', 'ner'],
},
@ -384,6 +431,7 @@ describe('getMlModelConfigsForModelIds lib function', () => {
modelId: 'trained-model-id-2',
modelState: TrainedModelState.Started,
pipelineName: '',
pipelineReferences: [],
trainedModelName: 'trained-model-id-2',
types: ['pytorch', 'ner'],
},
@ -413,6 +461,7 @@ describe('getMlModelConfigsForModelIds lib function', () => {
modelId: 'trained-model-id-1',
modelState: TrainedModelState.Started,
pipelineName: '',
pipelineReferences: [],
trainedModelName: 'trained-model-id-1',
types: ['pytorch', 'ner'],
},
@ -420,6 +469,7 @@ describe('getMlModelConfigsForModelIds lib function', () => {
modelId: 'trained-model-id-2',
modelState: TrainedModelState.Started,
pipelineName: '',
pipelineReferences: [],
trainedModelName: 'trained-model-id-2',
types: ['pytorch', 'ner'],
},
@ -427,6 +477,7 @@ describe('getMlModelConfigsForModelIds lib function', () => {
modelId: undefined, // Redacted
modelState: TrainedModelState.Started,
pipelineName: '',
pipelineReferences: [],
trainedModelName: 'trained-model-id-3-in-other-space',
types: ['pytorch', 'ner'],
},
@ -483,6 +534,7 @@ describe('fetchAndAddTrainedModelData lib function', () => {
modelId: 'trained-model-id-1',
modelState: TrainedModelState.NotDeployed,
pipelineName: 'ml-inference-pipeline-1',
pipelineReferences: [],
trainedModelName: 'trained-model-id-1',
types: [],
},
@ -490,6 +542,7 @@ describe('fetchAndAddTrainedModelData lib function', () => {
modelId: 'trained-model-id-2',
modelState: TrainedModelState.NotDeployed,
pipelineName: 'ml-inference-pipeline-2',
pipelineReferences: [],
trainedModelName: 'trained-model-id-2',
types: [],
},
@ -497,6 +550,7 @@ describe('fetchAndAddTrainedModelData lib function', () => {
modelId: 'trained-model-id-3',
modelState: TrainedModelState.NotDeployed,
pipelineName: 'ml-inference-pipeline-3',
pipelineReferences: [],
trainedModelName: 'trained-model-id-3',
types: [],
},
@ -504,6 +558,7 @@ describe('fetchAndAddTrainedModelData lib function', () => {
modelId: 'trained-model-id-4',
modelState: TrainedModelState.NotDeployed,
pipelineName: 'ml-inference-pipeline-4',
pipelineReferences: [],
trainedModelName: 'trained-model-id-4',
types: [],
},
@ -514,6 +569,7 @@ describe('fetchAndAddTrainedModelData lib function', () => {
modelId: 'trained-model-id-1',
modelState: TrainedModelState.NotDeployed,
pipelineName: 'ml-inference-pipeline-1',
pipelineReferences: [],
trainedModelName: 'trained-model-id-1',
types: ['lang_ident', 'ner'],
},
@ -521,6 +577,7 @@ describe('fetchAndAddTrainedModelData lib function', () => {
modelId: 'trained-model-id-2',
modelState: TrainedModelState.Started,
pipelineName: 'ml-inference-pipeline-2',
pipelineReferences: [],
trainedModelName: 'trained-model-id-2',
types: ['pytorch', 'ner'],
},
@ -529,6 +586,7 @@ describe('fetchAndAddTrainedModelData lib function', () => {
modelState: TrainedModelState.Failed,
modelStateReason: 'something is wrong, boom',
pipelineName: 'ml-inference-pipeline-3',
pipelineReferences: [],
trainedModelName: 'trained-model-id-3',
types: ['pytorch', 'text_classification'],
},
@ -536,6 +594,7 @@ describe('fetchAndAddTrainedModelData lib function', () => {
modelId: 'trained-model-id-4',
modelState: TrainedModelState.Starting,
pipelineName: 'ml-inference-pipeline-4',
pipelineReferences: [],
trainedModelName: 'trained-model-id-4',
types: ['pytorch', 'fill_mask'],
},
@ -599,7 +658,7 @@ describe('fetchMlInferencePipelineProcessors lib function', () => {
);
expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({
id: 'index-with-no-ml-inference-pipeline@ml-inference',
id: '*@ml-inference',
});
expect(mockClient.ingest.getPipeline).toHaveBeenCalledTimes(1);
expect(mockClient.ml.getTrainedModels).toHaveBeenCalledTimes(0);
@ -626,7 +685,7 @@ describe('fetchMlInferencePipelineProcessors lib function', () => {
);
expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({
id: 'my-index@ml-inference',
id: '*@ml-inference',
});
expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({
id: 'ml-inference-pipeline-1',
@ -663,7 +722,7 @@ describe('fetchMlInferencePipelineProcessors lib function', () => {
);
expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({
id: 'my-index@ml-inference',
id: '*@ml-inference',
});
expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({
id: 'ml-inference-pipeline-1',
@ -707,7 +766,7 @@ describe('fetchMlInferencePipelineProcessors lib function', () => {
);
expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({
id: 'my-index@ml-inference',
id: '*@ml-inference',
});
expect(mockClient.ingest.getPipeline).toHaveBeenCalledWith({
id: 'ml-inference-pipeline-1,ml-inference-pipeline-3',

View file

@ -5,6 +5,7 @@
* 2.0.
*/
import { IngestGetPipelineResponse } from '@elastic/elasticsearch/lib/api/types';
import { ElasticsearchClient } from '@kbn/core/server';
import { MlTrainedModels } from '@kbn/ml-plugin/server';
@ -16,31 +17,62 @@ export type InferencePipelineData = InferencePipeline & {
trainedModelName: string;
};
export const fetchMlInferencePipelineProcessorNames = async (
client: ElasticsearchClient,
indexName: string
): Promise<string[]> => {
export const fetchMlInferencePipelines = async (client: ElasticsearchClient) => {
try {
const mlInferencePipelineName = getInferencePipelineNameFromIndexName(indexName);
const {
[mlInferencePipelineName]: { processors: mlInferencePipelineProcessors = [] },
} = await client.ingest.getPipeline({
id: mlInferencePipelineName,
return await client.ingest.getPipeline({
id: getInferencePipelineNameFromIndexName('*'),
});
} catch (error) {
// The GET /_ingest/pipeline API returns an empty object on 404 Not Found. If there are no `@ml-inference`
// pipelines then return an empty record of pipelines
return {};
}
};
return mlInferencePipelineProcessors
.map((obj) => obj.pipeline?.name)
.filter((name): name is string => name !== undefined);
} catch (err) {
// The GET /_ingest/pipeline API returns an empty object on 404 Not Found. If someone provides
// a bad index name, catch the error and return an empty array of names.
export const getMlInferencePipelineProcessorNamesFromPipelines = (
indexName: string,
pipelines: IngestGetPipelineResponse
): string[] => {
const mlInferencePipelineName = getInferencePipelineNameFromIndexName(indexName);
if (pipelines?.[mlInferencePipelineName]?.processors === undefined) {
return [];
}
const {
[mlInferencePipelineName]: { processors: mlInferencePipelineProcessors = [] },
} = pipelines;
return mlInferencePipelineProcessors
.map((obj) => obj.pipeline?.name)
.filter((name): name is string => name !== undefined);
};
export const getProcessorPipelineMap = (
pipelines: IngestGetPipelineResponse
): Record<string, string[]> => {
const result: Record<string, string[]> = {};
const addPipelineToProcessorMap = (processorName: string, pipelineName: string) => {
if (processorName in result) {
result[processorName].push(pipelineName);
} else {
result[processorName] = [pipelineName];
}
};
Object.entries(pipelines).forEach(([name, pipeline]) =>
pipeline?.processors?.forEach((processor) => {
if (processor.pipeline?.name !== undefined) {
addPipelineToProcessorMap(processor.pipeline.name, name);
}
})
);
return result;
};
export const fetchPipelineProcessorInferenceData = async (
client: ElasticsearchClient,
mlInferencePipelineProcessorNames: string[]
mlInferencePipelineProcessorNames: string[],
pipelineProcessorsMap: Record<string, string[]>
): Promise<InferencePipelineData[]> => {
const mlInferencePipelineProcessorConfigs = await client.ingest.getPipeline({
id: mlInferencePipelineProcessorNames.join(),
@ -61,6 +93,7 @@ export const fetchPipelineProcessorInferenceData = async (
modelId: trainedModelName,
modelState: TrainedModelState.NotDeployed,
pipelineName: pipelineProcessorName,
pipelineReferences: pipelineProcessorsMap?.[pipelineProcessorName] ?? [],
trainedModelName,
types: [],
});
@ -96,6 +129,7 @@ export const getMlModelConfigsForModelIds = async (
modelId: modelNamesInCurrentSpace.includes(trainedModelName) ? trainedModelName : undefined,
modelState: TrainedModelState.NotDeployed,
pipelineName: '',
pipelineReferences: [],
trainedModelName,
types: getMlModelTypesForModelConfig(trainedModelData),
};
@ -155,9 +189,9 @@ export const fetchAndAddTrainedModelData = async (
return {
...data,
modelId,
types,
modelState,
modelStateReason,
types,
};
});
};
@ -171,9 +205,11 @@ export const fetchMlInferencePipelineProcessors = async (
return Promise.reject(new Error('Machine Learning is not enabled'));
}
const mlInferencePipelineProcessorNames = await fetchMlInferencePipelineProcessorNames(
client,
indexName
const allMlPipelines = await fetchMlInferencePipelines(client);
const pipelineProcessorsPipelineCountMap = getProcessorPipelineMap(allMlPipelines);
const mlInferencePipelineProcessorNames = getMlInferencePipelineProcessorNamesFromPipelines(
indexName,
allMlPipelines
);
// Elasticsearch's GET pipelines API call will return all of the pipeline data if no ids are
@ -183,7 +219,8 @@ export const fetchMlInferencePipelineProcessors = async (
const pipelineProcessorInferenceData = await fetchPipelineProcessorInferenceData(
client,
mlInferencePipelineProcessorNames
mlInferencePipelineProcessorNames,
pipelineProcessorsPipelineCountMap
);
// Elasticsearch's GET trained models and GET trained model stats API calls will return the