Deprecate 'remove_binary' default of false for ingest attachment processor (#90460)

This commit adds deprecation warning for when the `remove_binary`
setting is unset. In the future we want to change the default to `true`
(it is currently `false`), so this will let a user know they should be
explicit about setting this to ensure the behavior does not change in a
future (breaking) release.

Relates to #86014
This commit is contained in:
Lee Hinman 2022-10-03 08:34:40 -06:00 committed by GitHub
parent 1bb34e0982
commit 4fe9fc488c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 109 additions and 15 deletions

10
docs/changelog/90460.yaml Normal file
View file

@ -0,0 +1,10 @@
pr: 90460
summary: Deprecate 'remove_binary' default of false for ingest attachment processor
area: Ingest Node
type: deprecation
issues: []
deprecation:
title: Deprecate 'remove_binary' default of false for ingest attachment processor
area: CRUD
details: The default "remove_binary" option for the attachment processor will be changed from false to true in a later Elasticsearch release. This means that the binary file sent to Elasticsearch will not be retained.
impact: Users should update the "remove_binary" option to be explicitly true or false, instead of relying on the default value, so that no default value changes will affect Elasticsearch.

View file

@ -57,7 +57,8 @@ PUT _ingest/pipeline/attachment
"processors" : [ "processors" : [
{ {
"attachment" : { "attachment" : {
"field" : "data" "field" : "data",
"remove_binary": false
} }
} }
] ]
@ -141,7 +142,8 @@ PUT _ingest/pipeline/attachment
{ {
"attachment" : { "attachment" : {
"field" : "data", "field" : "data",
"properties": [ "content", "title" ] "properties": [ "content", "title" ],
"remove_binary": false
} }
} }
] ]
@ -167,7 +169,8 @@ PUT _ingest/pipeline/cbor-attachment
"processors" : [ "processors" : [
{ {
"attachment" : { "attachment" : {
"field" : "data" "field" : "data",
"remove_binary": false
} }
} }
] ]
@ -222,7 +225,8 @@ PUT _ingest/pipeline/attachment
"attachment" : { "attachment" : {
"field" : "data", "field" : "data",
"indexed_chars" : 11, "indexed_chars" : 11,
"indexed_chars_field" : "max_size" "indexed_chars_field" : "max_size",
"remove_binary": false
} }
} }
] ]
@ -269,7 +273,8 @@ PUT _ingest/pipeline/attachment
"attachment" : { "attachment" : {
"field" : "data", "field" : "data",
"indexed_chars" : 11, "indexed_chars" : 11,
"indexed_chars_field" : "max_size" "indexed_chars_field" : "max_size",
"remove_binary": false
} }
} }
] ]
@ -352,7 +357,8 @@ PUT _ingest/pipeline/attachment
"processor": { "processor": {
"attachment": { "attachment": {
"target_field": "_ingest._value.attachment", "target_field": "_ingest._value.attachment",
"field": "_ingest._value.data" "field": "_ingest._value.data",
"remove_binary": false
} }
} }
} }

View file

@ -14,7 +14,10 @@ import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office; import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.metadata.TikaCoreProperties;
import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.Version;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
import org.elasticsearch.common.logging.DeprecationCategory;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.ingest.AbstractProcessor; import org.elasticsearch.ingest.AbstractProcessor;
import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.ingest.Processor; import org.elasticsearch.ingest.Processor;
@ -30,16 +33,18 @@ import java.util.Set;
import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException; import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException;
import static org.elasticsearch.ingest.ConfigurationUtils.readBooleanProperty; import static org.elasticsearch.ingest.ConfigurationUtils.readBooleanProperty;
import static org.elasticsearch.ingest.ConfigurationUtils.readIntProperty; import static org.elasticsearch.ingest.ConfigurationUtils.readIntProperty;
import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalBooleanProperty;
import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalList; import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalList;
import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalStringProperty; import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalStringProperty;
import static org.elasticsearch.ingest.ConfigurationUtils.readStringProperty; import static org.elasticsearch.ingest.ConfigurationUtils.readStringProperty;
public final class AttachmentProcessor extends AbstractProcessor { public final class AttachmentProcessor extends AbstractProcessor {
public static final String TYPE = "attachment"; private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(AttachmentProcessor.class);
private static final int NUMBER_OF_CHARS_INDEXED = 100000; private static final int NUMBER_OF_CHARS_INDEXED = 100000;
public static final String TYPE = "attachment";
private final String field; private final String field;
private final String targetField; private final String targetField;
private final Set<Property> properties; private final Set<Property> properties;
@ -221,6 +226,15 @@ public final class AttachmentProcessor extends AbstractProcessor {
static final Set<Property> DEFAULT_PROPERTIES = EnumSet.allOf(Property.class); static final Set<Property> DEFAULT_PROPERTIES = EnumSet.allOf(Property.class);
static {
if (Version.CURRENT.major >= 9) {
throw new IllegalStateException(
"[poison pill] update the [remove_binary] default to be 'true' assuming "
+ "enough time has passed. Deprecated in September 2022."
);
}
}
@Override @Override
public AttachmentProcessor create( public AttachmentProcessor create(
Map<String, Processor.Factory> registry, Map<String, Processor.Factory> registry,
@ -235,7 +249,17 @@ public final class AttachmentProcessor extends AbstractProcessor {
int indexedChars = readIntProperty(TYPE, processorTag, config, "indexed_chars", NUMBER_OF_CHARS_INDEXED); int indexedChars = readIntProperty(TYPE, processorTag, config, "indexed_chars", NUMBER_OF_CHARS_INDEXED);
boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false); boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
String indexedCharsField = readOptionalStringProperty(TYPE, processorTag, config, "indexed_chars_field"); String indexedCharsField = readOptionalStringProperty(TYPE, processorTag, config, "indexed_chars_field");
boolean removeBinary = readBooleanProperty(TYPE, processorTag, config, "remove_binary", false); Boolean removeBinary = readOptionalBooleanProperty(TYPE, processorTag, config, "remove_binary");
if (removeBinary == null) {
DEPRECATION_LOGGER.warn(
DeprecationCategory.PARSING,
"attachment-remove-binary",
"The default [remove_binary] value of 'false' is deprecated and will be "
+ "set to 'true' in a future release. Set [remove_binary] explicitly to "
+ "'true' or 'false' to ensure no behavior change."
);
removeBinary = false;
}
final Set<Property> properties; final Set<Property> properties;
if (propertyNames != null) { if (propertyNames != null) {

View file

@ -41,6 +41,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
assertThat(processor.getTargetField(), equalTo("attachment")); assertThat(processor.getTargetField(), equalTo("attachment"));
assertThat(processor.getProperties(), sameInstance(AttachmentProcessor.Factory.DEFAULT_PROPERTIES)); assertThat(processor.getProperties(), sameInstance(AttachmentProcessor.Factory.DEFAULT_PROPERTIES));
assertFalse(processor.isIgnoreMissing()); assertFalse(processor.isIgnoreMissing());
assertWarnings(
"The default [remove_binary] value of 'false' is deprecated "
+ "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'"
+ " or 'false' to ensure no behavior change."
);
} }
public void testConfigureIndexedChars() throws Exception { public void testConfigureIndexedChars() throws Exception {
@ -54,6 +60,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
assertThat(processor.getTag(), equalTo(processorTag)); assertThat(processor.getTag(), equalTo(processorTag));
assertThat(processor.getIndexedChars(), is(indexedChars)); assertThat(processor.getIndexedChars(), is(indexedChars));
assertFalse(processor.isIgnoreMissing()); assertFalse(processor.isIgnoreMissing());
assertWarnings(
"The default [remove_binary] value of 'false' is deprecated "
+ "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'"
+ " or 'false' to ensure no behavior change."
);
} }
public void testBuildTargetField() throws Exception { public void testBuildTargetField() throws Exception {
@ -64,6 +76,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
assertThat(processor.getField(), equalTo("_field")); assertThat(processor.getField(), equalTo("_field"));
assertThat(processor.getTargetField(), equalTo("_field")); assertThat(processor.getTargetField(), equalTo("_field"));
assertFalse(processor.isIgnoreMissing()); assertFalse(processor.isIgnoreMissing());
assertWarnings(
"The default [remove_binary] value of 'false' is deprecated "
+ "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'"
+ " or 'false' to ensure no behavior change."
);
} }
public void testBuildFields() throws Exception { public void testBuildFields() throws Exception {
@ -82,6 +100,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
assertThat(processor.getField(), equalTo("_field")); assertThat(processor.getField(), equalTo("_field"));
assertThat(processor.getProperties(), equalTo(properties)); assertThat(processor.getProperties(), equalTo(properties));
assertFalse(processor.isIgnoreMissing()); assertFalse(processor.isIgnoreMissing());
assertWarnings(
"The default [remove_binary] value of 'false' is deprecated "
+ "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'"
+ " or 'false' to ensure no behavior change."
);
} }
public void testBuildIllegalFieldOption() throws Exception { public void testBuildIllegalFieldOption() throws Exception {
@ -108,6 +132,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
} catch (ElasticsearchParseException e) { } catch (ElasticsearchParseException e) {
assertThat(e.getMessage(), equalTo("[properties] property isn't a list, but of type [java.lang.String]")); assertThat(e.getMessage(), equalTo("[properties] property isn't a list, but of type [java.lang.String]"));
} }
assertWarnings(
"The default [remove_binary] value of 'false' is deprecated "
+ "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'"
+ " or 'false' to ensure no behavior change."
);
} }
public void testIgnoreMissing() throws Exception { public void testIgnoreMissing() throws Exception {
@ -123,6 +153,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
assertThat(processor.getTargetField(), equalTo("attachment")); assertThat(processor.getTargetField(), equalTo("attachment"));
assertThat(processor.getProperties(), sameInstance(AttachmentProcessor.Factory.DEFAULT_PROPERTIES)); assertThat(processor.getProperties(), sameInstance(AttachmentProcessor.Factory.DEFAULT_PROPERTIES));
assertTrue(processor.isIgnoreMissing()); assertTrue(processor.isIgnoreMissing());
assertWarnings(
"The default [remove_binary] value of 'false' is deprecated "
+ "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'"
+ " or 'false' to ensure no behavior change."
);
} }
public void testRemoveBinary() throws Exception { public void testRemoveBinary() throws Exception {

View file

@ -9,7 +9,8 @@
"processors": [ "processors": [
{ {
"attachment" : { "attachment" : {
"field" : "field1" "field" : "field1",
"remove_binary": false
} }
} }
] ]
@ -50,7 +51,8 @@
{ {
"attachment" : { "attachment" : {
"field" : "field1", "field" : "field1",
"properties" : ["language"] "properties" : ["language"],
"remove_binary": false
} }
} }
] ]
@ -84,7 +86,8 @@
{ {
"attachment" : { "attachment" : {
"field" : "field1", "field" : "field1",
"indexed_chars": 30 "indexed_chars": 30,
"remove_binary": true
} }
} }
] ]
@ -120,7 +123,8 @@
"attachment" : { "attachment" : {
"field" : "field1", "field" : "field1",
"indexed_chars": 30, "indexed_chars": 30,
"indexed_chars_field": "max_size" "indexed_chars_field": "max_size",
"remove_binary": true
} }
} }
] ]

View file

@ -12,7 +12,8 @@
"processors": [ "processors": [
{ {
"attachment" : { "attachment" : {
"field" : "field1" "field" : "field1",
"remove_binary": true
} }
} }
] ]
@ -55,7 +56,8 @@
"processors": [ "processors": [
{ {
"attachment" : { "attachment" : {
"field" : "field1" "field" : "field1",
"remove_binary": true
} }
} }
] ]

View file

@ -14,6 +14,7 @@ import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.util.Maps; import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.script.Script; import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptService; import org.elasticsearch.script.ScriptService;
import org.elasticsearch.script.ScriptType; import org.elasticsearch.script.ScriptType;
@ -216,6 +217,17 @@ public final class ConfigurationUtils {
} }
} }
@Nullable
public static Boolean readOptionalBooleanProperty(
String processorType,
String processorTag,
Map<String, Object> configuration,
String propertyName
) {
Object value = configuration.remove(propertyName);
return readBoolean(processorType, processorTag, propertyName, value);
}
private static Boolean readBoolean(String processorType, String processorTag, String propertyName, Object value) { private static Boolean readBoolean(String processorType, String processorTag, String propertyName, Object value) {
if (value == null) { if (value == null) {
return null; return null;