[Enterprise Search] Remove processors that copy ML inference results (#162826)

## Summary

This PR removes the auto-generated `set` and `remove` ingest processors
for `text_embedding` and `text_classification` ML inference task types.
These processors are responsible for copying the inference results to a
top-level field, for example
`ml.inference.content_embedding.predicted_value` -> `content_embedding`.

Reasons for the removal:
- It's a nice-to-have but not a critical feature
- Carries the risk of overwriting original fields in the document
- It's not documented

In addition we're deleting a related component
(`target_field_help_text.tsx`) which is now unused.

### Checklist
- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios

---------

Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Adam Demjen 2023-08-01 13:24:47 -04:00 committed by GitHub
parent dcfaf189f9
commit ba5dc1ebd1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 12 additions and 350 deletions

View file

@ -5,21 +5,14 @@
* 2.0.
*/
import {
IngestRemoveProcessor,
IngestSetProcessor,
MlTrainedModelConfig,
MlTrainedModelStats,
} from '@elastic/elasticsearch/lib/api/types';
import { SUPPORTED_PYTORCH_TASKS, BUILT_IN_MODEL_TAG } from '@kbn/ml-trained-models-utils';
import { MlTrainedModelConfig, MlTrainedModelStats } from '@elastic/elasticsearch/lib/api/types';
import { BUILT_IN_MODEL_TAG } from '@kbn/ml-trained-models-utils';
import { MlInferencePipeline, TrainedModelState } from '../types/pipelines';
import {
generateMlInferencePipelineBody,
getMlModelTypesForModelConfig,
getRemoveProcessorForInferenceType,
getSetProcessorForInferenceType,
parseMlInferenceParametersFromPipeline,
parseModelStateFromStats,
parseModelStateReasonFromStats,
@ -64,83 +57,13 @@ describe('getMlModelTypesForModelConfig lib function', () => {
});
});
describe('getRemoveProcessorForInferenceType lib function', () => {
const targetField = 'ml.inference.target';
it('should return expected value for TEXT_CLASSIFICATION', () => {
const inferenceType = SUPPORTED_PYTORCH_TASKS.TEXT_CLASSIFICATION;
const expected: IngestRemoveProcessor = {
field: targetField,
ignore_missing: true,
};
expect(getRemoveProcessorForInferenceType(targetField, inferenceType)).toEqual(expected);
});
it('should return expected value for TEXT_EMBEDDING', () => {
const inferenceType = SUPPORTED_PYTORCH_TASKS.TEXT_EMBEDDING;
const expected: IngestRemoveProcessor = {
field: targetField,
ignore_missing: true,
};
expect(getRemoveProcessorForInferenceType(targetField, inferenceType)).toEqual(expected);
});
it('should return undefined for unknown inferenceType', () => {
const inferenceType = 'wrongInferenceType';
expect(getRemoveProcessorForInferenceType(targetField, inferenceType)).toBeUndefined();
});
});
describe('getSetProcessorForInferenceType lib function', () => {
const targetField = 'dest';
it('should return expected value for TEXT_CLASSIFICATION', () => {
const inferenceType = SUPPORTED_PYTORCH_TASKS.TEXT_CLASSIFICATION;
const expected: IngestSetProcessor = {
copy_from: 'ml.inference.dest.predicted_value',
description:
"Copy the predicted_value to 'dest' if the prediction_probability is greater than 0.5",
field: targetField,
if: "ctx?.ml?.inference != null && ctx.ml.inference['dest'] != null && ctx.ml.inference['dest'].prediction_probability > 0.5",
value: undefined,
};
expect(getSetProcessorForInferenceType(targetField, inferenceType)).toEqual(expected);
});
it('should return expected value for TEXT_EMBEDDING', () => {
const inferenceType = SUPPORTED_PYTORCH_TASKS.TEXT_EMBEDDING;
const expected: IngestSetProcessor = {
copy_from: 'ml.inference.dest.predicted_value',
description: "Copy the predicted_value to 'dest'",
field: targetField,
if: "ctx?.ml?.inference != null && ctx.ml.inference['dest'] != null",
value: undefined,
};
expect(getSetProcessorForInferenceType(targetField, inferenceType)).toEqual(expected);
});
it('should return undefined for unknown inferenceType', () => {
const inferenceType = 'wrongInferenceType';
expect(getSetProcessorForInferenceType(targetField, inferenceType)).toBeUndefined();
});
});
describe('generateMlInferencePipelineBody lib function', () => {
const expected: MlInferencePipeline = {
description: 'my-description',
processors: [
{
remove: {
field: 'ml.inference.my-target-field',
field: 'my-target-field',
ignore_missing: true,
},
},
@ -166,7 +89,7 @@ describe('generateMlInferencePipelineBody lib function', () => {
},
},
],
target_field: 'ml.inference.my-target-field',
target_field: 'my-target-field',
},
},
{
@ -197,121 +120,15 @@ describe('generateMlInferencePipelineBody lib function', () => {
expect(actual).toEqual(expected);
});
it('should return something expected with specific processors', () => {
const mockTextClassificationModel: MlTrainedModelConfig = {
...mockModel,
...{ inference_config: { text_classification: {} } },
};
const actual: MlInferencePipeline = generateMlInferencePipelineBody({
description: 'my-description',
model: mockTextClassificationModel,
pipelineName: 'my-pipeline',
fieldMappings: [{ sourceField: 'my-source-field', targetField: 'my-target-field' }],
});
expect(actual).toEqual(
expect.objectContaining({
description: expect.any(String),
processors: expect.arrayContaining([
expect.objectContaining({
remove: {
field: 'my-target-field',
ignore_missing: true,
},
}),
expect.objectContaining({
set: {
copy_from: 'ml.inference.my-target-field.predicted_value',
description:
"Copy the predicted_value to 'my-target-field' if the prediction_probability is greater than 0.5",
field: 'my-target-field',
if: "ctx?.ml?.inference != null && ctx.ml.inference['my-target-field'] != null && ctx.ml.inference['my-target-field'].prediction_probability > 0.5",
},
}),
]),
})
);
});
it('should return something that safely removes redundant prefixes', () => {
const mockTextClassificationModel: MlTrainedModelConfig = {
...mockModel,
...{ inference_config: { text_classification: {} } },
};
const actual: MlInferencePipeline = generateMlInferencePipelineBody({
description: 'my-description',
model: mockTextClassificationModel,
pipelineName: 'my-pipeline',
fieldMappings: [
{ sourceField: 'my-source-field_1', targetField: 'ml.inference.my-source-field_expanded' },
{ sourceField: 'my-source-field_2', targetField: 'my-source-ml.inference-field_expanded' },
{
sourceField: 'my-source-field_3',
targetField: 'ml.inference.my-source-2-ml.inference-field_expanded',
},
],
});
expect(actual).toEqual(
expect.objectContaining({
description: expect.any(String),
processors: expect.arrayContaining([
expect.objectContaining({
remove: {
field: 'ml.inference.my-source-field_expanded',
ignore_missing: true,
},
}),
expect.objectContaining({
remove: {
field: 'ml.inference.my-source-ml.inference-field_expanded',
ignore_missing: true,
},
}),
expect.objectContaining({
remove: {
field: 'ml.inference.my-source-2-ml.inference-field_expanded',
ignore_missing: true,
},
}),
expect.objectContaining({
inference: expect.objectContaining({
field_map: {
'my-source-field_1': 'MODEL_INPUT_FIELD',
},
target_field: 'ml.inference.my-source-field_expanded',
}),
}),
expect.objectContaining({
inference: expect.objectContaining({
field_map: {
'my-source-field_2': 'MODEL_INPUT_FIELD',
},
target_field: 'ml.inference.my-source-ml.inference-field_expanded',
}),
}),
expect.objectContaining({
inference: expect.objectContaining({
field_map: {
'my-source-field_3': 'MODEL_INPUT_FIELD',
},
target_field: 'ml.inference.my-source-2-ml.inference-field_expanded',
}),
}),
]),
})
);
});
it('should return something expected with multiple fields', () => {
const actual: MlInferencePipeline = generateMlInferencePipelineBody({
description: 'my-description',
model: mockModel,
pipelineName: 'my-pipeline',
fieldMappings: [
{ sourceField: 'my-source-field1', targetField: 'my-destination-field1' },
{ sourceField: 'my-source-field2', targetField: 'my-destination-field2' },
{ sourceField: 'my-source-field3', targetField: 'my-destination-field3' },
{ sourceField: 'my-source-field1', targetField: 'my-target-field1' },
{ sourceField: 'my-source-field2', targetField: 'my-target-field2' },
{ sourceField: 'my-source-field3', targetField: 'my-target-field3' },
],
});
@ -320,17 +137,17 @@ describe('generateMlInferencePipelineBody lib function', () => {
processors: expect.arrayContaining([
{
remove: expect.objectContaining({
field: 'ml.inference.my-destination-field1',
field: 'my-target-field1',
}),
},
{
remove: expect.objectContaining({
field: 'ml.inference.my-destination-field2',
field: 'my-target-field2',
}),
},
{
remove: expect.objectContaining({
field: 'ml.inference.my-destination-field3',
field: 'my-target-field3',
}),
},
{

View file

@ -8,8 +8,6 @@
import {
IngestInferenceProcessor,
IngestPipeline,
IngestRemoveProcessor,
IngestSetProcessor,
MlTrainedModelConfig,
MlTrainedModelStats,
} from '@elastic/elasticsearch/lib/api/types';
@ -57,7 +55,6 @@ export const generateMlInferencePipelineBody = ({
model,
pipelineName,
}: MlInferencePipelineParams): MlInferencePipeline => {
const inferenceType = Object.keys(model.inference_config || {})[0];
const pipelineDefinition: MlInferencePipeline = {
description: description ?? '',
processors: [],
@ -67,7 +64,6 @@ export const generateMlInferencePipelineBody = ({
pipelineDefinition.processors = [
// Add remove and inference processors
...fieldMappings.flatMap(({ sourceField, targetField }) => {
const remove = getRemoveProcessorForInferenceType(targetField, inferenceType);
const inference = getInferenceProcessor(
sourceField,
targetField,
@ -79,11 +75,10 @@ export const generateMlInferencePipelineBody = ({
return [
{
remove: {
field: getMlInferencePrefixedFieldName(targetField),
field: targetField,
ignore_missing: true,
},
},
...(remove ? [{ remove }] : []),
{ inference },
];
}),
@ -101,12 +96,6 @@ export const generateMlInferencePipelineBody = ({
],
},
},
// Add set processors
...fieldMappings.flatMap(({ targetField }) => {
const set = getSetProcessorForInferenceType(targetField, inferenceType);
return set ? [{ set }] : [];
}),
];
return pipelineDefinition;
@ -144,51 +133,10 @@ export const getInferenceProcessor = (
},
},
],
target_field: getMlInferencePrefixedFieldName(targetField),
target_field: targetField,
};
};
export const getSetProcessorForInferenceType = (
targetField: string,
inferenceType: string
): IngestSetProcessor | undefined => {
let set: IngestSetProcessor | undefined;
if (inferenceType === SUPPORTED_PYTORCH_TASKS.TEXT_CLASSIFICATION) {
set = {
copy_from: `${getMlInferencePrefixedFieldName(targetField)}.predicted_value`,
description: `Copy the predicted_value to '${targetField}' if the prediction_probability is greater than 0.5`,
field: targetField,
if: `ctx?.ml?.inference != null && ctx.ml.inference['${targetField}'] != null && ctx.ml.inference['${targetField}'].prediction_probability > 0.5`,
value: undefined,
};
} else if (inferenceType === SUPPORTED_PYTORCH_TASKS.TEXT_EMBEDDING) {
set = {
copy_from: `${getMlInferencePrefixedFieldName(targetField)}.predicted_value`,
description: `Copy the predicted_value to '${targetField}'`,
field: targetField,
if: `ctx?.ml?.inference != null && ctx.ml.inference['${targetField}'] != null`,
value: undefined,
};
}
return set;
};
export const getRemoveProcessorForInferenceType = (
targetField: string,
inferenceType: string
): IngestRemoveProcessor | undefined => {
if (
inferenceType === SUPPORTED_PYTORCH_TASKS.TEXT_CLASSIFICATION ||
inferenceType === SUPPORTED_PYTORCH_TASKS.TEXT_EMBEDDING
) {
return {
field: targetField,
ignore_missing: true,
};
}
};
/**
* Parses model types list from the given configuration of a trained machine learning model
* @param trainedModel configuration for a trained machine learning model

View file

@ -1,91 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import React from 'react';
import { EuiText } from '@elastic/eui';
import { i18n } from '@kbn/i18n';
import { FormattedMessage, FormattedNumber } from '@kbn/i18n-react';
import { SUPPORTED_PYTORCH_TASKS } from '@kbn/ml-trained-models-utils';
import { getMlModelTypesForModelConfig } from '../../../../../../../common/ml_inference_pipeline';
import { TrainedModel } from '../../../../api/ml_models/ml_trained_models_logic';
import { getMLType } from '../../../shared/ml_inference/utils';
export interface TargetFieldHelpTextProps {
model?: TrainedModel;
pipelineName: string;
targetField: string;
}
export const TargetFieldHelpText: React.FC<TargetFieldHelpTextProps> = ({
pipelineName,
targetField,
model,
}) => {
const baseText = targetField
? i18n.translate(
'xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.steps.fields.targetField.helpText.userProvided',
{
defaultMessage:
'This names the field that holds the inference result. It will be prefixed with "ml.inference", ml.inference.{targetField}',
values: {
targetField,
},
}
)
: i18n.translate(
'xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.steps.fields.targetField.helpText.default',
{
defaultMessage:
'This names the field that holds the inference result. It will be prefixed with "ml.inference", if not set it will be defaulted to "ml.inference.{pipelineName}"',
values: {
pipelineName: pipelineName || '<Pipeline Name>',
},
}
);
const fieldName = targetField || pipelineName || '<Pipeline Name>';
const modelType = model ? getMLType(getMlModelTypesForModelConfig(model)) : '';
if (modelType === SUPPORTED_PYTORCH_TASKS.TEXT_CLASSIFICATION) {
return (
<EuiText size="xs">
<p>{baseText}</p>
<p>
<FormattedMessage
id="xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.steps.fields.targetField.helpText.textClassificationModel"
defaultMessage='Additionally the predicted_value will be copied to "{fieldName}", if the prediction_probability is greater than {probabilityThreshold}'
values={{
fieldName,
probabilityThreshold: <FormattedNumber value={0.5} />,
}}
/>
</p>
</EuiText>
);
}
if (modelType === SUPPORTED_PYTORCH_TASKS.TEXT_EMBEDDING) {
return (
<EuiText size="xs">
<p>{baseText}</p>
<p>
{i18n.translate(
'xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.steps.fields.targetField.helpText.textEmbeddingModel',
{
defaultMessage: 'Additionally the predicted_value will be copied to "{fieldName}"',
values: {
fieldName,
},
}
)}
</p>
</EuiText>
);
}
return <EuiText size="xs">{baseText}</EuiText>;
};

View file

@ -12147,10 +12147,6 @@
"xpack.enterpriseSearch.content.indices.deleteIndex.successToast.title": "Votre index {indexName} et toute configuration d'ingestion associée ont été supprimés avec succès",
"xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.noModels.description": "Aucun de vos modèles entraînés de Machine Learning ne peut être utilisé par un pipeline d'inférence. {documentationLink}",
"xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.steps.configure.name.helpText": "Les noms de pipeline sont uniques dans un déploiement, et ils peuvent uniquement contenir des lettres, des chiffres, des traits de soulignement et des traits d'union. Cela créera un pipeline nommé {pipelineName}.",
"xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.steps.fields.targetField.helpText.default": "Cela attribue un nom au champ qui contient le résultat d'inférence. Votre nom de champ recevra le préfixe \"ml.inference.\". S'il n'est pas défini, le nom par défaut sera \"ml.inference.{pipelineName}\"",
"xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.steps.fields.targetField.helpText.textClassificationModel": "De plus, la valeur_prévue (predicted_value) sera copiée sur \"{fieldName}\" si la probabilité de prédiction (prediction_probability) est supérieure à {probabilityThreshold}",
"xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.steps.fields.targetField.helpText.textEmbeddingModel": "De plus, la valeur_prévue (predicted_value) sera copiée sur \"{fieldName}\"",
"xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.steps.fields.targetField.helpText.userProvided": "Cela attribue un nom au champ qui contient le résultat d'inférence. \"ml.inference\" sera ajouté comme préfixe, ml.inference.{targetField}",
"xpack.enterpriseSearch.content.indices.pipelines.ingestPipelinesCard.customDescription": "Pipeline d'ingestion personnalisé pour {indexName}",
"xpack.enterpriseSearch.content.indices.pipelines.ingestPipelinesCard.processorsDescription": "{processorsCount} processeurs",
"xpack.enterpriseSearch.content.indices.pipelines.mlInferencePipelines.subtitleAPIindex": "Les pipelines d'inférence seront exécutés en tant que processeurs à partir du pipeline d'ingestion Enterprise Search. Afin d'utiliser ces pipelines dans des index basés sur des API, vous devrez référencer le pipeline {pipelineName} dans vos requêtes d'API.",

View file

@ -12161,10 +12161,6 @@
"xpack.enterpriseSearch.content.indices.deleteIndex.successToast.title": "インデックス{indexName}と関連付けられたすべての統合構成が正常に削除されました",
"xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.noModels.description": "推論パイプラインで使用できる学習済み機械学習モデルがありません。{documentationLink}",
"xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.steps.configure.name.helpText": "パイプライン名はデプロイ内で一意であり、文字、数字、アンダースコア、ハイフンのみを使用できます。これにより、{pipelineName}という名前のパイプラインが作成されます。",
"xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.steps.fields.targetField.helpText.default": "これは、推論結果を保持するフィールドの名前を指定します。\"ml.inference.\"というプレフィックスが付きます。設定されていない場合は、デフォルトで\"ml.inference.{pipelineName}\"となります",
"xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.steps.fields.targetField.helpText.textClassificationModel": "さらに、predicted_probabilityが{probabilityThreshold}より大きい場合、predicted_valueは\"{fieldName}\"にコピーされます。",
"xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.steps.fields.targetField.helpText.textEmbeddingModel": "さらにpredicted_valueは\"{fieldName}\"にコピーされます。",
"xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.steps.fields.targetField.helpText.userProvided": "これは、推論結果を保持するフィールドの名前を指定します。プレフィックスに\"ml.inference\"、ml.inference.{targetField}が付きます",
"xpack.enterpriseSearch.content.indices.pipelines.ingestPipelinesCard.customDescription": "{indexName}のカスタムインジェストパイプライン",
"xpack.enterpriseSearch.content.indices.pipelines.ingestPipelinesCard.processorsDescription": "{processorsCount}プロセッサー",
"xpack.enterpriseSearch.content.indices.pipelines.mlInferencePipelines.subtitleAPIindex": "推論パイプラインは、エンタープライズサーチインジェストパイプラインからのプロセッサーとして実行されます。APIベースのインデックスでこれらのパイプラインを使用するには、APIリクエストで{pipelineName}パイプラインを参照する必要があります。",

View file

@ -12161,10 +12161,6 @@
"xpack.enterpriseSearch.content.indices.deleteIndex.successToast.title": "您的索引 {indexName} 和任何关联的采集配置已成功删除",
"xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.noModels.description": "您没有可供推理管道使用的已训练 Machine Learning 模型。{documentationLink}",
"xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.steps.configure.name.helpText": "管道名称在部署内唯一,并且只能包含字母、数字、下划线和连字符。这会创建名为 {pipelineName} 的管道。",
"xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.steps.fields.targetField.helpText.default": "这会命名存放推理结果的字段。它将加有“ml.inference”前缀如果未设置将默认前缀为“ml.inference.{pipelineName}”",
"xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.steps.fields.targetField.helpText.textClassificationModel": "此外,如果 prediction_probability 大于 {probabilityThreshold},则会将 predicted_value 复制到“{fieldName}”",
"xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.steps.fields.targetField.helpText.textEmbeddingModel": "此外,还会将 predicted_value 复制到“{fieldName}”",
"xpack.enterpriseSearch.content.indices.pipelines.addInferencePipelineModal.steps.fields.targetField.helpText.userProvided": "这会命名存放推理结果的字段。它将加有“ml.inference”、ml.inference.{targetField} 前缀",
"xpack.enterpriseSearch.content.indices.pipelines.ingestPipelinesCard.customDescription": "{indexName} 的定制采集管道",
"xpack.enterpriseSearch.content.indices.pipelines.ingestPipelinesCard.processorsDescription": "{processorsCount} 个处理器",
"xpack.enterpriseSearch.content.indices.pipelines.mlInferencePipelines.subtitleAPIindex": "推理管道将作为处理器从 Enterprise Search 采集管道中运行。要在基于 API 的索引上使用这些管道,您需要在 API 请求中引用 {pipelineName} 管道。",