mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-28 01:22:26 -04:00
Synthetic source: avoid storing multi fields of type text and match_only_text by default. (#129126)
Don't store text and match_only_text field by default when source mode is synthetic and a field is a multi field or when there is a suitable multi field. Without this change, ES would store field otherwise twice in a multi-field configuration. For example: ``` ... "os": { "properties": { "name": { "ignore_above": 1024, "type": "keyword", "fields": { "text": { "type": "match_only_text" } } } ... ``` In this case, two stored fields were added, one in case for the `name` field and one for `name.text` multi-field. This change prevents this, and would never store a stored field when text or match_only_text field is a multi-field.
This commit is contained in:
parent
42dec5b41f
commit
33af83a0ca
6 changed files with 214 additions and 23 deletions
6
docs/changelog/129126.yaml
Normal file
6
docs/changelog/129126.yaml
Normal file
|
@ -0,0 +1,6 @@
|
|||
pr: 129126
|
||||
summary: "Synthetic source: avoid storing multi fields of type text and `match_only_text`\
|
||||
\ by default"
|
||||
area: Mapping
|
||||
type: bug
|
||||
issues: []
|
|
@ -33,6 +33,7 @@ import org.elasticsearch.common.CheckedIntFunction;
|
|||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.unit.Fuzziness;
|
||||
import org.elasticsearch.index.IndexVersion;
|
||||
import org.elasticsearch.index.IndexVersions;
|
||||
import org.elasticsearch.index.analysis.IndexAnalyzers;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.fielddata.FieldDataContext;
|
||||
|
@ -101,12 +102,9 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
|||
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
|
||||
|
||||
private final TextParams.Analyzers analyzers;
|
||||
private final boolean withinMultiField;
|
||||
|
||||
public Builder(String name, IndexAnalyzers indexAnalyzers) {
|
||||
this(name, IndexVersion.current(), indexAnalyzers);
|
||||
}
|
||||
|
||||
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
|
||||
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean withinMultiField) {
|
||||
super(name);
|
||||
this.indexCreatedVersion = indexCreatedVersion;
|
||||
this.analyzers = new TextParams.Analyzers(
|
||||
|
@ -115,6 +113,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
|||
m -> ((MatchOnlyTextFieldMapper) m).positionIncrementGap,
|
||||
indexCreatedVersion
|
||||
);
|
||||
this.withinMultiField = withinMultiField;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -140,18 +139,21 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
|||
@Override
|
||||
public MatchOnlyTextFieldMapper build(MapperBuilderContext context) {
|
||||
MatchOnlyTextFieldType tft = buildFieldType(context);
|
||||
return new MatchOnlyTextFieldMapper(
|
||||
leafName(),
|
||||
Defaults.FIELD_TYPE,
|
||||
tft,
|
||||
builderParams(this, context),
|
||||
context.isSourceSynthetic(),
|
||||
this
|
||||
);
|
||||
final boolean storeSource;
|
||||
if (indexCreatedVersion.onOrAfter(IndexVersions.MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED)) {
|
||||
storeSource = context.isSourceSynthetic()
|
||||
&& withinMultiField == false
|
||||
&& multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
|
||||
} else {
|
||||
storeSource = context.isSourceSynthetic();
|
||||
}
|
||||
return new MatchOnlyTextFieldMapper(leafName(), Defaults.FIELD_TYPE, tft, builderParams(this, context), storeSource, this);
|
||||
}
|
||||
}
|
||||
|
||||
public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));
|
||||
public static final TypeParser PARSER = new TypeParser(
|
||||
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.isWithinMultiField())
|
||||
);
|
||||
|
||||
public static class MatchOnlyTextFieldType extends StringFieldType {
|
||||
|
||||
|
@ -406,6 +408,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
|||
private final int positionIncrementGap;
|
||||
private final boolean storeSource;
|
||||
private final FieldType fieldType;
|
||||
private final boolean withinMultiField;
|
||||
|
||||
private MatchOnlyTextFieldMapper(
|
||||
String simpleName,
|
||||
|
@ -424,6 +427,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
|||
this.indexAnalyzer = builder.analyzers.getIndexAnalyzer();
|
||||
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
|
||||
this.storeSource = storeSource;
|
||||
this.withinMultiField = builder.withinMultiField;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -433,7 +437,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
|||
|
||||
@Override
|
||||
public FieldMapper.Builder getMergeBuilder() {
|
||||
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers).init(this);
|
||||
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField).init(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.tests.analysis.Token;
|
|||
import org.apache.lucene.tests.index.RandomIndexWriter;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.core.Tuple;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.mapper.DocumentMapper;
|
||||
import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
||||
import org.elasticsearch.index.mapper.LuceneDocument;
|
||||
|
@ -46,8 +47,10 @@ import java.util.List;
|
|||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.hamcrest.Matchers.containsString;
|
||||
import static org.hamcrest.Matchers.empty;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
import static org.hamcrest.core.Is.is;
|
||||
|
||||
public class MatchOnlyTextFieldMapperTests extends MapperTestCase {
|
||||
|
||||
|
@ -255,4 +258,91 @@ public class MatchOnlyTextFieldMapperTests extends MapperTestCase {
|
|||
protected IngestScriptSupport ingestScriptSupport() {
|
||||
throw new AssumptionViolatedException("not supported");
|
||||
}
|
||||
|
||||
public void testStoreParameterDefaultsSyntheticSource() throws IOException {
|
||||
var indexSettingsBuilder = getIndexSettingsBuilder();
|
||||
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
|
||||
var indexSettings = indexSettingsBuilder.build();
|
||||
|
||||
var mapping = mapping(b -> {
|
||||
b.startObject("name");
|
||||
b.field("type", "match_only_text");
|
||||
b.endObject();
|
||||
});
|
||||
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
|
||||
|
||||
var source = source(b -> b.field("name", "quick brown fox"));
|
||||
ParsedDocument doc = mapper.parse(source);
|
||||
|
||||
{
|
||||
List<IndexableField> fields = doc.rootDoc().getFields("name");
|
||||
IndexableFieldType fieldType = fields.get(0).fieldType();
|
||||
assertThat(fieldType.stored(), is(false));
|
||||
}
|
||||
{
|
||||
List<IndexableField> fields = doc.rootDoc().getFields("name._original");
|
||||
IndexableFieldType fieldType = fields.get(0).fieldType();
|
||||
assertThat(fieldType.stored(), is(true));
|
||||
}
|
||||
}
|
||||
|
||||
public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException {
|
||||
var indexSettingsBuilder = getIndexSettingsBuilder();
|
||||
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
|
||||
var indexSettings = indexSettingsBuilder.build();
|
||||
|
||||
var mapping = mapping(b -> {
|
||||
b.startObject("name");
|
||||
b.field("type", "match_only_text");
|
||||
b.startObject("fields");
|
||||
b.startObject("keyword");
|
||||
b.field("type", "keyword");
|
||||
b.endObject();
|
||||
b.endObject();
|
||||
b.endObject();
|
||||
});
|
||||
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
|
||||
|
||||
var source = source(b -> b.field("name", "quick brown fox"));
|
||||
ParsedDocument doc = mapper.parse(source);
|
||||
{
|
||||
List<IndexableField> fields = doc.rootDoc().getFields("name");
|
||||
IndexableFieldType fieldType = fields.get(0).fieldType();
|
||||
assertThat(fieldType.stored(), is(false));
|
||||
}
|
||||
{
|
||||
List<IndexableField> fields = doc.rootDoc().getFields("name._original");
|
||||
assertThat(fields, empty());
|
||||
}
|
||||
}
|
||||
|
||||
public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException {
|
||||
var indexSettingsBuilder = getIndexSettingsBuilder();
|
||||
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
|
||||
var indexSettings = indexSettingsBuilder.build();
|
||||
|
||||
var mapping = mapping(b -> {
|
||||
b.startObject("name");
|
||||
b.field("type", "keyword");
|
||||
b.startObject("fields");
|
||||
b.startObject("text");
|
||||
b.field("type", "match_only_text");
|
||||
b.endObject();
|
||||
b.endObject();
|
||||
b.endObject();
|
||||
});
|
||||
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
|
||||
|
||||
var source = source(b -> b.field("name", "quick brown fox"));
|
||||
ParsedDocument doc = mapper.parse(source);
|
||||
{
|
||||
List<IndexableField> fields = doc.rootDoc().getFields("name.text");
|
||||
IndexableFieldType fieldType = fields.get(0).fieldType();
|
||||
assertThat(fieldType.stored(), is(false));
|
||||
}
|
||||
{
|
||||
List<IndexableField> fields = doc.rootDoc().getFields("name.text._original");
|
||||
assertThat(fields, empty());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -171,6 +171,7 @@ public class IndexVersions {
|
|||
public static final IndexVersion DEFAULT_TO_ACORN_HNSW_FILTER_HEURISTIC = def(9_026_0_00, Version.LUCENE_10_2_1);
|
||||
public static final IndexVersion SEQ_NO_WITHOUT_POINTS = def(9_027_0_00, Version.LUCENE_10_2_1);
|
||||
public static final IndexVersion INDEX_INT_SORT_INT_TYPE = def(9_028_0_00, Version.LUCENE_10_2_1);
|
||||
public static final IndexVersion MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED = def(9_029_0_00, Version.LUCENE_10_2_1);
|
||||
|
||||
/*
|
||||
* STOP! READ THIS FIRST! No, really,
|
||||
|
|
|
@ -287,11 +287,19 @@ public final class TextFieldMapper extends FieldMapper {
|
|||
|
||||
final TextParams.Analyzers analyzers;
|
||||
|
||||
private final boolean withinMultiField;
|
||||
|
||||
public Builder(String name, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
|
||||
this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled);
|
||||
this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled, false);
|
||||
}
|
||||
|
||||
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
|
||||
public Builder(
|
||||
String name,
|
||||
IndexVersion indexCreatedVersion,
|
||||
IndexAnalyzers indexAnalyzers,
|
||||
boolean isSyntheticSourceEnabled,
|
||||
boolean withinMultiField
|
||||
) {
|
||||
super(name);
|
||||
|
||||
// If synthetic source is used we need to either store this field
|
||||
|
@ -300,10 +308,17 @@ public final class TextFieldMapper extends FieldMapper {
|
|||
// storing the field without requiring users to explicitly set 'store'.
|
||||
//
|
||||
// If 'store' parameter was explicitly provided we'll reject the request.
|
||||
this.store = Parameter.storeParam(
|
||||
m -> ((TextFieldMapper) m).store,
|
||||
() -> isSyntheticSourceEnabled && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false
|
||||
);
|
||||
// Note that if current builder is a multi field, then we don't need to store, given that responsibility lies with parent field
|
||||
this.withinMultiField = withinMultiField;
|
||||
this.store = Parameter.storeParam(m -> ((TextFieldMapper) m).store, () -> {
|
||||
if (indexCreatedVersion.onOrAfter(IndexVersions.MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED)) {
|
||||
return isSyntheticSourceEnabled
|
||||
&& this.withinMultiField == false
|
||||
&& multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
|
||||
} else {
|
||||
return isSyntheticSourceEnabled;
|
||||
}
|
||||
});
|
||||
this.indexCreatedVersion = indexCreatedVersion;
|
||||
this.analyzers = new TextParams.Analyzers(
|
||||
indexAnalyzers,
|
||||
|
@ -482,7 +497,13 @@ public final class TextFieldMapper extends FieldMapper {
|
|||
}
|
||||
|
||||
public static final TypeParser PARSER = createTypeParserWithLegacySupport(
|
||||
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), SourceFieldMapper.isSynthetic(c.getIndexSettings()))
|
||||
(n, c) -> new Builder(
|
||||
n,
|
||||
c.indexVersionCreated(),
|
||||
c.getIndexAnalyzers(),
|
||||
SourceFieldMapper.isSynthetic(c.getIndexSettings()),
|
||||
c.isWithinMultiField()
|
||||
)
|
||||
);
|
||||
|
||||
private static class PhraseWrappedAnalyzer extends AnalyzerWrapper {
|
||||
|
@ -1304,6 +1325,7 @@ public final class TextFieldMapper extends FieldMapper {
|
|||
private final SubFieldInfo phraseFieldInfo;
|
||||
|
||||
private final boolean isSyntheticSourceEnabled;
|
||||
private final boolean isWithinMultiField;
|
||||
|
||||
private TextFieldMapper(
|
||||
String simpleName,
|
||||
|
@ -1337,6 +1359,7 @@ public final class TextFieldMapper extends FieldMapper {
|
|||
this.freqFilter = builder.freqFilter.getValue();
|
||||
this.fieldData = builder.fieldData.get();
|
||||
this.isSyntheticSourceEnabled = builder.isSyntheticSourceEnabled;
|
||||
this.isWithinMultiField = builder.withinMultiField;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -1360,7 +1383,7 @@ public final class TextFieldMapper extends FieldMapper {
|
|||
|
||||
@Override
|
||||
public FieldMapper.Builder getMergeBuilder() {
|
||||
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled).init(this);
|
||||
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled, isWithinMultiField).init(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -307,6 +307,73 @@ public class TextFieldMapperTests extends MapperTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testStoreParameterDefaultsSyntheticSource() throws IOException {
|
||||
var indexSettingsBuilder = getIndexSettingsBuilder();
|
||||
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
|
||||
var indexSettings = indexSettingsBuilder.build();
|
||||
|
||||
var mapping = mapping(b -> {
|
||||
b.startObject("name");
|
||||
b.field("type", "text");
|
||||
b.endObject();
|
||||
});
|
||||
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
|
||||
|
||||
var source = source(b -> b.field("name", "quick brown fox"));
|
||||
ParsedDocument doc = mapper.parse(source);
|
||||
List<IndexableField> fields = doc.rootDoc().getFields("name");
|
||||
IndexableFieldType fieldType = fields.get(0).fieldType();
|
||||
assertThat(fieldType.stored(), is(true));
|
||||
}
|
||||
|
||||
public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException {
|
||||
var indexSettingsBuilder = getIndexSettingsBuilder();
|
||||
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
|
||||
var indexSettings = indexSettingsBuilder.build();
|
||||
|
||||
var mapping = mapping(b -> {
|
||||
b.startObject("name");
|
||||
b.field("type", "text");
|
||||
b.startObject("fields");
|
||||
b.startObject("keyword");
|
||||
b.field("type", "keyword");
|
||||
b.endObject();
|
||||
b.endObject();
|
||||
b.endObject();
|
||||
});
|
||||
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
|
||||
|
||||
var source = source(b -> b.field("name", "quick brown fox"));
|
||||
ParsedDocument doc = mapper.parse(source);
|
||||
List<IndexableField> fields = doc.rootDoc().getFields("name");
|
||||
IndexableFieldType fieldType = fields.get(0).fieldType();
|
||||
assertThat(fieldType.stored(), is(false));
|
||||
}
|
||||
|
||||
public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException {
|
||||
var indexSettingsBuilder = getIndexSettingsBuilder();
|
||||
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
|
||||
var indexSettings = indexSettingsBuilder.build();
|
||||
|
||||
var mapping = mapping(b -> {
|
||||
b.startObject("name");
|
||||
b.field("type", "keyword");
|
||||
b.startObject("fields");
|
||||
b.startObject("text");
|
||||
b.field("type", "text");
|
||||
b.endObject();
|
||||
b.endObject();
|
||||
b.endObject();
|
||||
});
|
||||
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
|
||||
|
||||
var source = source(b -> b.field("name", "quick brown fox"));
|
||||
ParsedDocument doc = mapper.parse(source);
|
||||
List<IndexableField> fields = doc.rootDoc().getFields("name.text");
|
||||
IndexableFieldType fieldType = fields.get(0).fieldType();
|
||||
assertThat(fieldType.stored(), is(false));
|
||||
}
|
||||
|
||||
public void testBWCSerialization() throws IOException {
|
||||
MapperService mapperService = createMapperService(fieldMapping(b -> {
|
||||
b.field("type", "text");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue