diff --git a/docs/changelog/90460.yaml b/docs/changelog/90460.yaml new file mode 100644 index 000000000000..57e70ac3aebe --- /dev/null +++ b/docs/changelog/90460.yaml @@ -0,0 +1,10 @@ +pr: 90460 +summary: Deprecate 'remove_binary' default of false for ingest attachment processor +area: Ingest Node +type: deprecation +issues: [] +deprecation: + title: Deprecate 'remove_binary' default of false for ingest attachment processor + area: CRUD + details: The default "remove_binary" option for the attachment processor will be changed from false to true in a later Elasticsearch release. This means that the binary file sent to Elasticsearch will not be retained. + impact: Users should update the "remove_binary" option to be explicitly true or false, instead of relying on the default value, so that no default value changes will affect Elasticsearch. diff --git a/docs/reference/ingest/processors/attachment.asciidoc b/docs/reference/ingest/processors/attachment.asciidoc index 13ad497fafa5..fd2866906c1d 100644 --- a/docs/reference/ingest/processors/attachment.asciidoc +++ b/docs/reference/ingest/processors/attachment.asciidoc @@ -57,7 +57,8 @@ PUT _ingest/pipeline/attachment "processors" : [ { "attachment" : { - "field" : "data" + "field" : "data", + "remove_binary": false } } ] @@ -141,7 +142,8 @@ PUT _ingest/pipeline/attachment { "attachment" : { "field" : "data", - "properties": [ "content", "title" ] + "properties": [ "content", "title" ], + "remove_binary": false } } ] @@ -167,7 +169,8 @@ PUT _ingest/pipeline/cbor-attachment "processors" : [ { "attachment" : { - "field" : "data" + "field" : "data", + "remove_binary": false } } ] @@ -222,7 +225,8 @@ PUT _ingest/pipeline/attachment "attachment" : { "field" : "data", "indexed_chars" : 11, - "indexed_chars_field" : "max_size" + "indexed_chars_field" : "max_size", + "remove_binary": false } } ] @@ -269,7 +273,8 @@ PUT _ingest/pipeline/attachment "attachment" : { "field" : "data", "indexed_chars" : 11, - "indexed_chars_field" : "max_size" + "indexed_chars_field" : "max_size", + "remove_binary": false } } ] @@ -352,7 +357,8 @@ PUT _ingest/pipeline/attachment "processor": { "attachment": { "target_field": "_ingest._value.attachment", - "field": "_ingest._value.data" + "field": "_ingest._value.data", + "remove_binary": false } } } diff --git a/modules/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/AttachmentProcessor.java b/modules/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/AttachmentProcessor.java index 504d5e302f76..362c0c188726 100644 --- a/modules/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/AttachmentProcessor.java +++ b/modules/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/AttachmentProcessor.java @@ -14,7 +14,10 @@ import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.Office; import org.apache.tika.metadata.TikaCoreProperties; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.Version; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.logging.DeprecationCategory; +import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.ingest.AbstractProcessor; import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.Processor; @@ -30,16 +33,18 @@ import java.util.Set; import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException; import static org.elasticsearch.ingest.ConfigurationUtils.readBooleanProperty; import static org.elasticsearch.ingest.ConfigurationUtils.readIntProperty; +import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalBooleanProperty; import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalList; import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalStringProperty; import static org.elasticsearch.ingest.ConfigurationUtils.readStringProperty; public final class AttachmentProcessor extends AbstractProcessor { - public static final String TYPE = "attachment"; - + private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(AttachmentProcessor.class); private static final int NUMBER_OF_CHARS_INDEXED = 100000; + public static final String TYPE = "attachment"; + private final String field; private final String targetField; private final Set properties; @@ -221,6 +226,15 @@ public final class AttachmentProcessor extends AbstractProcessor { static final Set DEFAULT_PROPERTIES = EnumSet.allOf(Property.class); + static { + if (Version.CURRENT.major >= 9) { + throw new IllegalStateException( + "[poison pill] update the [remove_binary] default to be 'true' assuming " + + "enough time has passed. Deprecated in September 2022." + ); + } + } + @Override public AttachmentProcessor create( Map registry, @@ -235,7 +249,17 @@ public final class AttachmentProcessor extends AbstractProcessor { int indexedChars = readIntProperty(TYPE, processorTag, config, "indexed_chars", NUMBER_OF_CHARS_INDEXED); boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false); String indexedCharsField = readOptionalStringProperty(TYPE, processorTag, config, "indexed_chars_field"); - boolean removeBinary = readBooleanProperty(TYPE, processorTag, config, "remove_binary", false); + Boolean removeBinary = readOptionalBooleanProperty(TYPE, processorTag, config, "remove_binary"); + if (removeBinary == null) { + DEPRECATION_LOGGER.warn( + DeprecationCategory.PARSING, + "attachment-remove-binary", + "The default [remove_binary] value of 'false' is deprecated and will be " + + "set to 'true' in a future release. Set [remove_binary] explicitly to " + + "'true' or 'false' to ensure no behavior change." + ); + removeBinary = false; + } final Set properties; if (propertyNames != null) { diff --git a/modules/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorFactoryTests.java b/modules/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorFactoryTests.java index 45c3407020dc..0a18504e4505 100644 --- a/modules/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorFactoryTests.java +++ b/modules/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorFactoryTests.java @@ -41,6 +41,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase { assertThat(processor.getTargetField(), equalTo("attachment")); assertThat(processor.getProperties(), sameInstance(AttachmentProcessor.Factory.DEFAULT_PROPERTIES)); assertFalse(processor.isIgnoreMissing()); + + assertWarnings( + "The default [remove_binary] value of 'false' is deprecated " + + "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'" + + " or 'false' to ensure no behavior change." + ); } public void testConfigureIndexedChars() throws Exception { @@ -54,6 +60,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase { assertThat(processor.getTag(), equalTo(processorTag)); assertThat(processor.getIndexedChars(), is(indexedChars)); assertFalse(processor.isIgnoreMissing()); + + assertWarnings( + "The default [remove_binary] value of 'false' is deprecated " + + "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'" + + " or 'false' to ensure no behavior change." + ); } public void testBuildTargetField() throws Exception { @@ -64,6 +76,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase { assertThat(processor.getField(), equalTo("_field")); assertThat(processor.getTargetField(), equalTo("_field")); assertFalse(processor.isIgnoreMissing()); + + assertWarnings( + "The default [remove_binary] value of 'false' is deprecated " + + "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'" + + " or 'false' to ensure no behavior change." + ); } public void testBuildFields() throws Exception { @@ -82,6 +100,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase { assertThat(processor.getField(), equalTo("_field")); assertThat(processor.getProperties(), equalTo(properties)); assertFalse(processor.isIgnoreMissing()); + + assertWarnings( + "The default [remove_binary] value of 'false' is deprecated " + + "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'" + + " or 'false' to ensure no behavior change." + ); } public void testBuildIllegalFieldOption() throws Exception { @@ -108,6 +132,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase { } catch (ElasticsearchParseException e) { assertThat(e.getMessage(), equalTo("[properties] property isn't a list, but of type [java.lang.String]")); } + + assertWarnings( + "The default [remove_binary] value of 'false' is deprecated " + + "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'" + + " or 'false' to ensure no behavior change." + ); } public void testIgnoreMissing() throws Exception { @@ -123,6 +153,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase { assertThat(processor.getTargetField(), equalTo("attachment")); assertThat(processor.getProperties(), sameInstance(AttachmentProcessor.Factory.DEFAULT_PROPERTIES)); assertTrue(processor.isIgnoreMissing()); + + assertWarnings( + "The default [remove_binary] value of 'false' is deprecated " + + "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'" + + " or 'false' to ensure no behavior change." + ); } public void testRemoveBinary() throws Exception { diff --git a/modules/ingest-attachment/src/yamlRestTest/resources/rest-api-spec/test/ingest_attachment/20_attachment_processor.yml b/modules/ingest-attachment/src/yamlRestTest/resources/rest-api-spec/test/ingest_attachment/20_attachment_processor.yml index 714a434775db..55b6b34a04e8 100644 --- a/modules/ingest-attachment/src/yamlRestTest/resources/rest-api-spec/test/ingest_attachment/20_attachment_processor.yml +++ b/modules/ingest-attachment/src/yamlRestTest/resources/rest-api-spec/test/ingest_attachment/20_attachment_processor.yml @@ -9,7 +9,8 @@ "processors": [ { "attachment" : { - "field" : "field1" + "field" : "field1", + "remove_binary": false } } ] @@ -50,7 +51,8 @@ { "attachment" : { "field" : "field1", - "properties" : ["language"] + "properties" : ["language"], + "remove_binary": false } } ] @@ -84,7 +86,8 @@ { "attachment" : { "field" : "field1", - "indexed_chars": 30 + "indexed_chars": 30, + "remove_binary": true } } ] @@ -120,7 +123,8 @@ "attachment" : { "field" : "field1", "indexed_chars": 30, - "indexed_chars_field": "max_size" + "indexed_chars_field": "max_size", + "remove_binary": true } } ] diff --git a/modules/ingest-attachment/src/yamlRestTest/resources/rest-api-spec/test/ingest_attachment/30_files_supported.yml b/modules/ingest-attachment/src/yamlRestTest/resources/rest-api-spec/test/ingest_attachment/30_files_supported.yml index 530345e3a355..8a27108cf9a8 100644 --- a/modules/ingest-attachment/src/yamlRestTest/resources/rest-api-spec/test/ingest_attachment/30_files_supported.yml +++ b/modules/ingest-attachment/src/yamlRestTest/resources/rest-api-spec/test/ingest_attachment/30_files_supported.yml @@ -12,7 +12,8 @@ "processors": [ { "attachment" : { - "field" : "field1" + "field" : "field1", + "remove_binary": true } } ] @@ -55,7 +56,8 @@ "processors": [ { "attachment" : { - "field" : "field1" + "field" : "field1", + "remove_binary": true } } ] diff --git a/server/src/main/java/org/elasticsearch/ingest/ConfigurationUtils.java b/server/src/main/java/org/elasticsearch/ingest/ConfigurationUtils.java index 5f9c9fadc841..654b0adc9824 100644 --- a/server/src/main/java/org/elasticsearch/ingest/ConfigurationUtils.java +++ b/server/src/main/java/org/elasticsearch/ingest/ConfigurationUtils.java @@ -14,6 +14,7 @@ import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.util.Maps; import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.core.Nullable; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptService; import org.elasticsearch.script.ScriptType; @@ -216,6 +217,17 @@ public final class ConfigurationUtils { } } + @Nullable + public static Boolean readOptionalBooleanProperty( + String processorType, + String processorTag, + Map configuration, + String propertyName + ) { + Object value = configuration.remove(propertyName); + return readBoolean(processorType, processorTag, propertyName, value); + } + private static Boolean readBoolean(String processorType, String processorTag, String propertyName, Object value) { if (value == null) { return null;