From 33af83a0ca3892e62d033cf8fb710e17374719dc Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 10 Jun 2025 16:32:47 +0200 Subject: [PATCH] Synthetic source: avoid storing multi fields of type text and match_only_text by default. (#129126) Don't store text and match_only_text field by default when source mode is synthetic and a field is a multi field or when there is a suitable multi field. Without this change, ES would store field otherwise twice in a multi-field configuration. For example: ``` ... "os": { "properties": { "name": { "ignore_above": 1024, "type": "keyword", "fields": { "text": { "type": "match_only_text" } } } ... ``` In this case, two stored fields were added, one in case for the `name` field and one for `name.text` multi-field. This change prevents this, and would never store a stored field when text or match_only_text field is a multi-field. --- docs/changelog/129126.yaml | 6 ++ .../extras/MatchOnlyTextFieldMapper.java | 34 +++---- .../extras/MatchOnlyTextFieldMapperTests.java | 90 +++++++++++++++++++ .../elasticsearch/index/IndexVersions.java | 1 + .../index/mapper/TextFieldMapper.java | 39 ++++++-- .../index/mapper/TextFieldMapperTests.java | 67 ++++++++++++++ 6 files changed, 214 insertions(+), 23 deletions(-) create mode 100644 docs/changelog/129126.yaml diff --git a/docs/changelog/129126.yaml b/docs/changelog/129126.yaml new file mode 100644 index 000000000000..b719af9892ba --- /dev/null +++ b/docs/changelog/129126.yaml @@ -0,0 +1,6 @@ +pr: 129126 +summary: "Synthetic source: avoid storing multi fields of type text and `match_only_text`\ + \ by default" +area: Mapping +type: bug +issues: [] diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index 055f6091ac48..1f799cc6d3d4 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -33,6 +33,7 @@ import org.elasticsearch.common.CheckedIntFunction; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.fielddata.FieldDataContext; @@ -101,12 +102,9 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { private final Parameter> meta = Parameter.metaParam(); private final TextParams.Analyzers analyzers; + private final boolean withinMultiField; - public Builder(String name, IndexAnalyzers indexAnalyzers) { - this(name, IndexVersion.current(), indexAnalyzers); - } - - public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers) { + public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean withinMultiField) { super(name); this.indexCreatedVersion = indexCreatedVersion; this.analyzers = new TextParams.Analyzers( @@ -115,6 +113,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { m -> ((MatchOnlyTextFieldMapper) m).positionIncrementGap, indexCreatedVersion ); + this.withinMultiField = withinMultiField; } @Override @@ -140,18 +139,21 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { @Override public MatchOnlyTextFieldMapper build(MapperBuilderContext context) { MatchOnlyTextFieldType tft = buildFieldType(context); - return new MatchOnlyTextFieldMapper( - leafName(), - Defaults.FIELD_TYPE, - tft, - builderParams(this, context), - context.isSourceSynthetic(), - this - ); + final boolean storeSource; + if (indexCreatedVersion.onOrAfter(IndexVersions.MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED)) { + storeSource = context.isSourceSynthetic() + && withinMultiField == false + && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false; + } else { + storeSource = context.isSourceSynthetic(); + } + return new MatchOnlyTextFieldMapper(leafName(), Defaults.FIELD_TYPE, tft, builderParams(this, context), storeSource, this); } } - public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers())); + public static final TypeParser PARSER = new TypeParser( + (n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.isWithinMultiField()) + ); public static class MatchOnlyTextFieldType extends StringFieldType { @@ -406,6 +408,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { private final int positionIncrementGap; private final boolean storeSource; private final FieldType fieldType; + private final boolean withinMultiField; private MatchOnlyTextFieldMapper( String simpleName, @@ -424,6 +427,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { this.indexAnalyzer = builder.analyzers.getIndexAnalyzer(); this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue(); this.storeSource = storeSource; + this.withinMultiField = builder.withinMultiField; } @Override @@ -433,7 +437,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexCreatedVersion, indexAnalyzers).init(this); + return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField).init(this); } @Override diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java index b913edb1f279..22841f8c42bf 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java @@ -23,6 +23,7 @@ import org.apache.lucene.tests.analysis.Token; import org.apache.lucene.tests.index.RandomIndexWriter; import org.elasticsearch.common.Strings; import org.elasticsearch.core.Tuple; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.LuceneDocument; @@ -46,8 +47,10 @@ import java.util.List; import java.util.stream.Collectors; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.core.Is.is; public class MatchOnlyTextFieldMapperTests extends MapperTestCase { @@ -255,4 +258,91 @@ public class MatchOnlyTextFieldMapperTests extends MapperTestCase { protected IngestScriptSupport ingestScriptSupport() { throw new AssumptionViolatedException("not supported"); } + + public void testStoreParameterDefaultsSyntheticSource() throws IOException { + var indexSettingsBuilder = getIndexSettingsBuilder(); + indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic"); + var indexSettings = indexSettingsBuilder.build(); + + var mapping = mapping(b -> { + b.startObject("name"); + b.field("type", "match_only_text"); + b.endObject(); + }); + DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper(); + + var source = source(b -> b.field("name", "quick brown fox")); + ParsedDocument doc = mapper.parse(source); + + { + List fields = doc.rootDoc().getFields("name"); + IndexableFieldType fieldType = fields.get(0).fieldType(); + assertThat(fieldType.stored(), is(false)); + } + { + List fields = doc.rootDoc().getFields("name._original"); + IndexableFieldType fieldType = fields.get(0).fieldType(); + assertThat(fieldType.stored(), is(true)); + } + } + + public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException { + var indexSettingsBuilder = getIndexSettingsBuilder(); + indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic"); + var indexSettings = indexSettingsBuilder.build(); + + var mapping = mapping(b -> { + b.startObject("name"); + b.field("type", "match_only_text"); + b.startObject("fields"); + b.startObject("keyword"); + b.field("type", "keyword"); + b.endObject(); + b.endObject(); + b.endObject(); + }); + DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper(); + + var source = source(b -> b.field("name", "quick brown fox")); + ParsedDocument doc = mapper.parse(source); + { + List fields = doc.rootDoc().getFields("name"); + IndexableFieldType fieldType = fields.get(0).fieldType(); + assertThat(fieldType.stored(), is(false)); + } + { + List fields = doc.rootDoc().getFields("name._original"); + assertThat(fields, empty()); + } + } + + public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException { + var indexSettingsBuilder = getIndexSettingsBuilder(); + indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic"); + var indexSettings = indexSettingsBuilder.build(); + + var mapping = mapping(b -> { + b.startObject("name"); + b.field("type", "keyword"); + b.startObject("fields"); + b.startObject("text"); + b.field("type", "match_only_text"); + b.endObject(); + b.endObject(); + b.endObject(); + }); + DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper(); + + var source = source(b -> b.field("name", "quick brown fox")); + ParsedDocument doc = mapper.parse(source); + { + List fields = doc.rootDoc().getFields("name.text"); + IndexableFieldType fieldType = fields.get(0).fieldType(); + assertThat(fieldType.stored(), is(false)); + } + { + List fields = doc.rootDoc().getFields("name.text._original"); + assertThat(fields, empty()); + } + } } diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index f32d4d7a2a30..970b63081225 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -171,6 +171,7 @@ public class IndexVersions { public static final IndexVersion DEFAULT_TO_ACORN_HNSW_FILTER_HEURISTIC = def(9_026_0_00, Version.LUCENE_10_2_1); public static final IndexVersion SEQ_NO_WITHOUT_POINTS = def(9_027_0_00, Version.LUCENE_10_2_1); public static final IndexVersion INDEX_INT_SORT_INT_TYPE = def(9_028_0_00, Version.LUCENE_10_2_1); + public static final IndexVersion MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED = def(9_029_0_00, Version.LUCENE_10_2_1); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 08cef586e143..0e49506cd92e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -287,11 +287,19 @@ public final class TextFieldMapper extends FieldMapper { final TextParams.Analyzers analyzers; + private final boolean withinMultiField; + public Builder(String name, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) { - this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled); + this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled, false); } - public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) { + public Builder( + String name, + IndexVersion indexCreatedVersion, + IndexAnalyzers indexAnalyzers, + boolean isSyntheticSourceEnabled, + boolean withinMultiField + ) { super(name); // If synthetic source is used we need to either store this field @@ -300,10 +308,17 @@ public final class TextFieldMapper extends FieldMapper { // storing the field without requiring users to explicitly set 'store'. // // If 'store' parameter was explicitly provided we'll reject the request. - this.store = Parameter.storeParam( - m -> ((TextFieldMapper) m).store, - () -> isSyntheticSourceEnabled && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false - ); + // Note that if current builder is a multi field, then we don't need to store, given that responsibility lies with parent field + this.withinMultiField = withinMultiField; + this.store = Parameter.storeParam(m -> ((TextFieldMapper) m).store, () -> { + if (indexCreatedVersion.onOrAfter(IndexVersions.MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED)) { + return isSyntheticSourceEnabled + && this.withinMultiField == false + && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false; + } else { + return isSyntheticSourceEnabled; + } + }); this.indexCreatedVersion = indexCreatedVersion; this.analyzers = new TextParams.Analyzers( indexAnalyzers, @@ -482,7 +497,13 @@ public final class TextFieldMapper extends FieldMapper { } public static final TypeParser PARSER = createTypeParserWithLegacySupport( - (n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), SourceFieldMapper.isSynthetic(c.getIndexSettings())) + (n, c) -> new Builder( + n, + c.indexVersionCreated(), + c.getIndexAnalyzers(), + SourceFieldMapper.isSynthetic(c.getIndexSettings()), + c.isWithinMultiField() + ) ); private static class PhraseWrappedAnalyzer extends AnalyzerWrapper { @@ -1304,6 +1325,7 @@ public final class TextFieldMapper extends FieldMapper { private final SubFieldInfo phraseFieldInfo; private final boolean isSyntheticSourceEnabled; + private final boolean isWithinMultiField; private TextFieldMapper( String simpleName, @@ -1337,6 +1359,7 @@ public final class TextFieldMapper extends FieldMapper { this.freqFilter = builder.freqFilter.getValue(); this.fieldData = builder.fieldData.get(); this.isSyntheticSourceEnabled = builder.isSyntheticSourceEnabled; + this.isWithinMultiField = builder.withinMultiField; } @Override @@ -1360,7 +1383,7 @@ public final class TextFieldMapper extends FieldMapper { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled).init(this); + return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled, isWithinMultiField).init(this); } @Override diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index 4670738c1d21..fa2fce306ff8 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -307,6 +307,73 @@ public class TextFieldMapperTests extends MapperTestCase { } } + public void testStoreParameterDefaultsSyntheticSource() throws IOException { + var indexSettingsBuilder = getIndexSettingsBuilder(); + indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic"); + var indexSettings = indexSettingsBuilder.build(); + + var mapping = mapping(b -> { + b.startObject("name"); + b.field("type", "text"); + b.endObject(); + }); + DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper(); + + var source = source(b -> b.field("name", "quick brown fox")); + ParsedDocument doc = mapper.parse(source); + List fields = doc.rootDoc().getFields("name"); + IndexableFieldType fieldType = fields.get(0).fieldType(); + assertThat(fieldType.stored(), is(true)); + } + + public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException { + var indexSettingsBuilder = getIndexSettingsBuilder(); + indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic"); + var indexSettings = indexSettingsBuilder.build(); + + var mapping = mapping(b -> { + b.startObject("name"); + b.field("type", "text"); + b.startObject("fields"); + b.startObject("keyword"); + b.field("type", "keyword"); + b.endObject(); + b.endObject(); + b.endObject(); + }); + DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper(); + + var source = source(b -> b.field("name", "quick brown fox")); + ParsedDocument doc = mapper.parse(source); + List fields = doc.rootDoc().getFields("name"); + IndexableFieldType fieldType = fields.get(0).fieldType(); + assertThat(fieldType.stored(), is(false)); + } + + public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException { + var indexSettingsBuilder = getIndexSettingsBuilder(); + indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic"); + var indexSettings = indexSettingsBuilder.build(); + + var mapping = mapping(b -> { + b.startObject("name"); + b.field("type", "keyword"); + b.startObject("fields"); + b.startObject("text"); + b.field("type", "text"); + b.endObject(); + b.endObject(); + b.endObject(); + }); + DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper(); + + var source = source(b -> b.field("name", "quick brown fox")); + ParsedDocument doc = mapper.parse(source); + List fields = doc.rootDoc().getFields("name.text"); + IndexableFieldType fieldType = fields.get(0).fieldType(); + assertThat(fieldType.stored(), is(false)); + } + public void testBWCSerialization() throws IOException { MapperService mapperService = createMapperService(fieldMapping(b -> { b.field("type", "text");