Synthetic source: avoid storing multi fields of type text and match_only_text by default. (#129126)

Don't store text and match_only_text field by default when source mode is synthetic and a field is a multi field or when there is a suitable multi field.

Without this change, ES would store field otherwise twice in a multi-field configuration.

For example:

```
...
"os": {
  "properties": {
    "name": {
      "ignore_above": 1024,
      "type": "keyword",
      "fields": {
        "text": {
          "type": "match_only_text"
        }
      }
    }
...
```

In this case, two stored fields were added, one in case for the `name` field and one for `name.text` multi-field.
This change prevents this, and would never store a stored field when text or match_only_text field is a multi-field.
This commit is contained in:
Martijn van Groningen 2025-06-10 16:32:47 +02:00 committed by GitHub
parent 42dec5b41f
commit 33af83a0ca
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 214 additions and 23 deletions

View file

@ -0,0 +1,6 @@
pr: 129126
summary: "Synthetic source: avoid storing multi fields of type text and `match_only_text`\
\ by default"
area: Mapping
type: bug
issues: []

View file

@ -33,6 +33,7 @@ import org.elasticsearch.common.CheckedIntFunction;
import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.FieldDataContext;
@ -101,12 +102,9 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
private final Parameter<Map<String, String>> meta = Parameter.metaParam(); private final Parameter<Map<String, String>> meta = Parameter.metaParam();
private final TextParams.Analyzers analyzers; private final TextParams.Analyzers analyzers;
private final boolean withinMultiField;
public Builder(String name, IndexAnalyzers indexAnalyzers) { public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean withinMultiField) {
this(name, IndexVersion.current(), indexAnalyzers);
}
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
super(name); super(name);
this.indexCreatedVersion = indexCreatedVersion; this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers( this.analyzers = new TextParams.Analyzers(
@ -115,6 +113,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
m -> ((MatchOnlyTextFieldMapper) m).positionIncrementGap, m -> ((MatchOnlyTextFieldMapper) m).positionIncrementGap,
indexCreatedVersion indexCreatedVersion
); );
this.withinMultiField = withinMultiField;
} }
@Override @Override
@ -140,18 +139,21 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
@Override @Override
public MatchOnlyTextFieldMapper build(MapperBuilderContext context) { public MatchOnlyTextFieldMapper build(MapperBuilderContext context) {
MatchOnlyTextFieldType tft = buildFieldType(context); MatchOnlyTextFieldType tft = buildFieldType(context);
return new MatchOnlyTextFieldMapper( final boolean storeSource;
leafName(), if (indexCreatedVersion.onOrAfter(IndexVersions.MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED)) {
Defaults.FIELD_TYPE, storeSource = context.isSourceSynthetic()
tft, && withinMultiField == false
builderParams(this, context), && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
context.isSourceSynthetic(), } else {
this storeSource = context.isSourceSynthetic();
); }
return new MatchOnlyTextFieldMapper(leafName(), Defaults.FIELD_TYPE, tft, builderParams(this, context), storeSource, this);
} }
} }
public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers())); public static final TypeParser PARSER = new TypeParser(
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.isWithinMultiField())
);
public static class MatchOnlyTextFieldType extends StringFieldType { public static class MatchOnlyTextFieldType extends StringFieldType {
@ -406,6 +408,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
private final int positionIncrementGap; private final int positionIncrementGap;
private final boolean storeSource; private final boolean storeSource;
private final FieldType fieldType; private final FieldType fieldType;
private final boolean withinMultiField;
private MatchOnlyTextFieldMapper( private MatchOnlyTextFieldMapper(
String simpleName, String simpleName,
@ -424,6 +427,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
this.indexAnalyzer = builder.analyzers.getIndexAnalyzer(); this.indexAnalyzer = builder.analyzers.getIndexAnalyzer();
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue(); this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
this.storeSource = storeSource; this.storeSource = storeSource;
this.withinMultiField = builder.withinMultiField;
} }
@Override @Override
@ -433,7 +437,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
@Override @Override
public FieldMapper.Builder getMergeBuilder() { public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers).init(this); return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField).init(this);
} }
@Override @Override

View file

@ -23,6 +23,7 @@ import org.apache.lucene.tests.analysis.Token;
import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.tests.index.RandomIndexWriter;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
import org.elasticsearch.core.Tuple; import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.LuceneDocument; import org.elasticsearch.index.mapper.LuceneDocument;
@ -46,8 +47,10 @@ import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.core.Is.is;
public class MatchOnlyTextFieldMapperTests extends MapperTestCase { public class MatchOnlyTextFieldMapperTests extends MapperTestCase {
@ -255,4 +258,91 @@ public class MatchOnlyTextFieldMapperTests extends MapperTestCase {
protected IngestScriptSupport ingestScriptSupport() { protected IngestScriptSupport ingestScriptSupport() {
throw new AssumptionViolatedException("not supported"); throw new AssumptionViolatedException("not supported");
} }
public void testStoreParameterDefaultsSyntheticSource() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();
var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "match_only_text");
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
{
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
{
List<IndexableField> fields = doc.rootDoc().getFields("name._original");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(true));
}
}
public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();
var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "match_only_text");
b.startObject("fields");
b.startObject("keyword");
b.field("type", "keyword");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
{
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
{
List<IndexableField> fields = doc.rootDoc().getFields("name._original");
assertThat(fields, empty());
}
}
public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();
var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "keyword");
b.startObject("fields");
b.startObject("text");
b.field("type", "match_only_text");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
{
List<IndexableField> fields = doc.rootDoc().getFields("name.text");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
{
List<IndexableField> fields = doc.rootDoc().getFields("name.text._original");
assertThat(fields, empty());
}
}
} }

View file

@ -171,6 +171,7 @@ public class IndexVersions {
public static final IndexVersion DEFAULT_TO_ACORN_HNSW_FILTER_HEURISTIC = def(9_026_0_00, Version.LUCENE_10_2_1); public static final IndexVersion DEFAULT_TO_ACORN_HNSW_FILTER_HEURISTIC = def(9_026_0_00, Version.LUCENE_10_2_1);
public static final IndexVersion SEQ_NO_WITHOUT_POINTS = def(9_027_0_00, Version.LUCENE_10_2_1); public static final IndexVersion SEQ_NO_WITHOUT_POINTS = def(9_027_0_00, Version.LUCENE_10_2_1);
public static final IndexVersion INDEX_INT_SORT_INT_TYPE = def(9_028_0_00, Version.LUCENE_10_2_1); public static final IndexVersion INDEX_INT_SORT_INT_TYPE = def(9_028_0_00, Version.LUCENE_10_2_1);
public static final IndexVersion MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED = def(9_029_0_00, Version.LUCENE_10_2_1);
/* /*
* STOP! READ THIS FIRST! No, really, * STOP! READ THIS FIRST! No, really,

View file

@ -287,11 +287,19 @@ public final class TextFieldMapper extends FieldMapper {
final TextParams.Analyzers analyzers; final TextParams.Analyzers analyzers;
private final boolean withinMultiField;
public Builder(String name, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) { public Builder(String name, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled); this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled, false);
} }
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) { public Builder(
String name,
IndexVersion indexCreatedVersion,
IndexAnalyzers indexAnalyzers,
boolean isSyntheticSourceEnabled,
boolean withinMultiField
) {
super(name); super(name);
// If synthetic source is used we need to either store this field // If synthetic source is used we need to either store this field
@ -300,10 +308,17 @@ public final class TextFieldMapper extends FieldMapper {
// storing the field without requiring users to explicitly set 'store'. // storing the field without requiring users to explicitly set 'store'.
// //
// If 'store' parameter was explicitly provided we'll reject the request. // If 'store' parameter was explicitly provided we'll reject the request.
this.store = Parameter.storeParam( // Note that if current builder is a multi field, then we don't need to store, given that responsibility lies with parent field
m -> ((TextFieldMapper) m).store, this.withinMultiField = withinMultiField;
() -> isSyntheticSourceEnabled && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false this.store = Parameter.storeParam(m -> ((TextFieldMapper) m).store, () -> {
); if (indexCreatedVersion.onOrAfter(IndexVersions.MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED)) {
return isSyntheticSourceEnabled
&& this.withinMultiField == false
&& multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
} else {
return isSyntheticSourceEnabled;
}
});
this.indexCreatedVersion = indexCreatedVersion; this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers( this.analyzers = new TextParams.Analyzers(
indexAnalyzers, indexAnalyzers,
@ -482,7 +497,13 @@ public final class TextFieldMapper extends FieldMapper {
} }
public static final TypeParser PARSER = createTypeParserWithLegacySupport( public static final TypeParser PARSER = createTypeParserWithLegacySupport(
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), SourceFieldMapper.isSynthetic(c.getIndexSettings())) (n, c) -> new Builder(
n,
c.indexVersionCreated(),
c.getIndexAnalyzers(),
SourceFieldMapper.isSynthetic(c.getIndexSettings()),
c.isWithinMultiField()
)
); );
private static class PhraseWrappedAnalyzer extends AnalyzerWrapper { private static class PhraseWrappedAnalyzer extends AnalyzerWrapper {
@ -1304,6 +1325,7 @@ public final class TextFieldMapper extends FieldMapper {
private final SubFieldInfo phraseFieldInfo; private final SubFieldInfo phraseFieldInfo;
private final boolean isSyntheticSourceEnabled; private final boolean isSyntheticSourceEnabled;
private final boolean isWithinMultiField;
private TextFieldMapper( private TextFieldMapper(
String simpleName, String simpleName,
@ -1337,6 +1359,7 @@ public final class TextFieldMapper extends FieldMapper {
this.freqFilter = builder.freqFilter.getValue(); this.freqFilter = builder.freqFilter.getValue();
this.fieldData = builder.fieldData.get(); this.fieldData = builder.fieldData.get();
this.isSyntheticSourceEnabled = builder.isSyntheticSourceEnabled; this.isSyntheticSourceEnabled = builder.isSyntheticSourceEnabled;
this.isWithinMultiField = builder.withinMultiField;
} }
@Override @Override
@ -1360,7 +1383,7 @@ public final class TextFieldMapper extends FieldMapper {
@Override @Override
public FieldMapper.Builder getMergeBuilder() { public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled).init(this); return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled, isWithinMultiField).init(this);
} }
@Override @Override

View file

@ -307,6 +307,73 @@ public class TextFieldMapperTests extends MapperTestCase {
} }
} }
public void testStoreParameterDefaultsSyntheticSource() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();
var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "text");
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(true));
}
public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();
var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "text");
b.startObject("fields");
b.startObject("keyword");
b.field("type", "keyword");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();
var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "keyword");
b.startObject("fields");
b.startObject("text");
b.field("type", "text");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
List<IndexableField> fields = doc.rootDoc().getFields("name.text");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
public void testBWCSerialization() throws IOException { public void testBWCSerialization() throws IOException {
MapperService mapperService = createMapperService(fieldMapping(b -> { MapperService mapperService = createMapperService(fieldMapping(b -> {
b.field("type", "text"); b.field("type", "text");