Deprecate 'remove_binary' default of false for ingest attachment processor (#90460)

This commit adds deprecation warning for when the `remove_binary`
setting is unset. In the future we want to change the default to `true`
(it is currently `false`), so this will let a user know they should be
explicit about setting this to ensure the behavior does not change in a
future (breaking) release.

Relates to #86014
This commit is contained in:
Lee Hinman 2022-10-03 08:34:40 -06:00 committed by GitHub
parent 1bb34e0982
commit 4fe9fc488c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 109 additions and 15 deletions

10
docs/changelog/90460.yaml Normal file
View file

@ -0,0 +1,10 @@
pr: 90460
summary: Deprecate 'remove_binary' default of false for ingest attachment processor
area: Ingest Node
type: deprecation
issues: []
deprecation:
title: Deprecate 'remove_binary' default of false for ingest attachment processor
area: CRUD
details: The default "remove_binary" option for the attachment processor will be changed from false to true in a later Elasticsearch release. This means that the binary file sent to Elasticsearch will not be retained.
impact: Users should update the "remove_binary" option to be explicitly true or false, instead of relying on the default value, so that no default value changes will affect Elasticsearch.

View file

@ -57,7 +57,8 @@ PUT _ingest/pipeline/attachment
"processors" : [
{
"attachment" : {
"field" : "data"
"field" : "data",
"remove_binary": false
}
}
]
@ -141,7 +142,8 @@ PUT _ingest/pipeline/attachment
{
"attachment" : {
"field" : "data",
"properties": [ "content", "title" ]
"properties": [ "content", "title" ],
"remove_binary": false
}
}
]
@ -167,7 +169,8 @@ PUT _ingest/pipeline/cbor-attachment
"processors" : [
{
"attachment" : {
"field" : "data"
"field" : "data",
"remove_binary": false
}
}
]
@ -222,7 +225,8 @@ PUT _ingest/pipeline/attachment
"attachment" : {
"field" : "data",
"indexed_chars" : 11,
"indexed_chars_field" : "max_size"
"indexed_chars_field" : "max_size",
"remove_binary": false
}
}
]
@ -269,7 +273,8 @@ PUT _ingest/pipeline/attachment
"attachment" : {
"field" : "data",
"indexed_chars" : 11,
"indexed_chars_field" : "max_size"
"indexed_chars_field" : "max_size",
"remove_binary": false
}
}
]
@ -352,7 +357,8 @@ PUT _ingest/pipeline/attachment
"processor": {
"attachment": {
"target_field": "_ingest._value.attachment",
"field": "_ingest._value.data"
"field": "_ingest._value.data",
"remove_binary": false
}
}
}

View file

@ -14,7 +14,10 @@ import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.TikaCoreProperties;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.Version;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.logging.DeprecationCategory;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.ingest.AbstractProcessor;
import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.ingest.Processor;
@ -30,16 +33,18 @@ import java.util.Set;
import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException;
import static org.elasticsearch.ingest.ConfigurationUtils.readBooleanProperty;
import static org.elasticsearch.ingest.ConfigurationUtils.readIntProperty;
import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalBooleanProperty;
import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalList;
import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalStringProperty;
import static org.elasticsearch.ingest.ConfigurationUtils.readStringProperty;
public final class AttachmentProcessor extends AbstractProcessor {
public static final String TYPE = "attachment";
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(AttachmentProcessor.class);
private static final int NUMBER_OF_CHARS_INDEXED = 100000;
public static final String TYPE = "attachment";
private final String field;
private final String targetField;
private final Set<Property> properties;
@ -221,6 +226,15 @@ public final class AttachmentProcessor extends AbstractProcessor {
static final Set<Property> DEFAULT_PROPERTIES = EnumSet.allOf(Property.class);
static {
if (Version.CURRENT.major >= 9) {
throw new IllegalStateException(
"[poison pill] update the [remove_binary] default to be 'true' assuming "
+ "enough time has passed. Deprecated in September 2022."
);
}
}
@Override
public AttachmentProcessor create(
Map<String, Processor.Factory> registry,
@ -235,7 +249,17 @@ public final class AttachmentProcessor extends AbstractProcessor {
int indexedChars = readIntProperty(TYPE, processorTag, config, "indexed_chars", NUMBER_OF_CHARS_INDEXED);
boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
String indexedCharsField = readOptionalStringProperty(TYPE, processorTag, config, "indexed_chars_field");
boolean removeBinary = readBooleanProperty(TYPE, processorTag, config, "remove_binary", false);
Boolean removeBinary = readOptionalBooleanProperty(TYPE, processorTag, config, "remove_binary");
if (removeBinary == null) {
DEPRECATION_LOGGER.warn(
DeprecationCategory.PARSING,
"attachment-remove-binary",
"The default [remove_binary] value of 'false' is deprecated and will be "
+ "set to 'true' in a future release. Set [remove_binary] explicitly to "
+ "'true' or 'false' to ensure no behavior change."
);
removeBinary = false;
}
final Set<Property> properties;
if (propertyNames != null) {

View file

@ -41,6 +41,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
assertThat(processor.getTargetField(), equalTo("attachment"));
assertThat(processor.getProperties(), sameInstance(AttachmentProcessor.Factory.DEFAULT_PROPERTIES));
assertFalse(processor.isIgnoreMissing());
assertWarnings(
"The default [remove_binary] value of 'false' is deprecated "
+ "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'"
+ " or 'false' to ensure no behavior change."
);
}
public void testConfigureIndexedChars() throws Exception {
@ -54,6 +60,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
assertThat(processor.getTag(), equalTo(processorTag));
assertThat(processor.getIndexedChars(), is(indexedChars));
assertFalse(processor.isIgnoreMissing());
assertWarnings(
"The default [remove_binary] value of 'false' is deprecated "
+ "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'"
+ " or 'false' to ensure no behavior change."
);
}
public void testBuildTargetField() throws Exception {
@ -64,6 +76,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
assertThat(processor.getField(), equalTo("_field"));
assertThat(processor.getTargetField(), equalTo("_field"));
assertFalse(processor.isIgnoreMissing());
assertWarnings(
"The default [remove_binary] value of 'false' is deprecated "
+ "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'"
+ " or 'false' to ensure no behavior change."
);
}
public void testBuildFields() throws Exception {
@ -82,6 +100,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
assertThat(processor.getField(), equalTo("_field"));
assertThat(processor.getProperties(), equalTo(properties));
assertFalse(processor.isIgnoreMissing());
assertWarnings(
"The default [remove_binary] value of 'false' is deprecated "
+ "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'"
+ " or 'false' to ensure no behavior change."
);
}
public void testBuildIllegalFieldOption() throws Exception {
@ -108,6 +132,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
} catch (ElasticsearchParseException e) {
assertThat(e.getMessage(), equalTo("[properties] property isn't a list, but of type [java.lang.String]"));
}
assertWarnings(
"The default [remove_binary] value of 'false' is deprecated "
+ "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'"
+ " or 'false' to ensure no behavior change."
);
}
public void testIgnoreMissing() throws Exception {
@ -123,6 +153,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
assertThat(processor.getTargetField(), equalTo("attachment"));
assertThat(processor.getProperties(), sameInstance(AttachmentProcessor.Factory.DEFAULT_PROPERTIES));
assertTrue(processor.isIgnoreMissing());
assertWarnings(
"The default [remove_binary] value of 'false' is deprecated "
+ "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'"
+ " or 'false' to ensure no behavior change."
);
}
public void testRemoveBinary() throws Exception {

View file

@ -9,7 +9,8 @@
"processors": [
{
"attachment" : {
"field" : "field1"
"field" : "field1",
"remove_binary": false
}
}
]
@ -50,7 +51,8 @@
{
"attachment" : {
"field" : "field1",
"properties" : ["language"]
"properties" : ["language"],
"remove_binary": false
}
}
]
@ -84,7 +86,8 @@
{
"attachment" : {
"field" : "field1",
"indexed_chars": 30
"indexed_chars": 30,
"remove_binary": true
}
}
]
@ -120,7 +123,8 @@
"attachment" : {
"field" : "field1",
"indexed_chars": 30,
"indexed_chars_field": "max_size"
"indexed_chars_field": "max_size",
"remove_binary": true
}
}
]

View file

@ -12,7 +12,8 @@
"processors": [
{
"attachment" : {
"field" : "field1"
"field" : "field1",
"remove_binary": true
}
}
]
@ -55,7 +56,8 @@
"processors": [
{
"attachment" : {
"field" : "field1"
"field" : "field1",
"remove_binary": true
}
}
]

View file

@ -14,6 +14,7 @@ import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptService;
import org.elasticsearch.script.ScriptType;
@ -216,6 +217,17 @@ public final class ConfigurationUtils {
}
}
@Nullable
public static Boolean readOptionalBooleanProperty(
String processorType,
String processorTag,
Map<String, Object> configuration,
String propertyName
) {
Object value = configuration.remove(propertyName);
return readBoolean(processorType, processorTag, propertyName, value);
}
private static Boolean readBoolean(String processorType, String processorTag, String propertyName, Object value) {
if (value == null) {
return null;