mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-28 17:34:17 -04:00
Implement synthetic source support for annotated text field (#107735)
This PR adds synthetic source support for annotated_text fields. Existing implementation for text is reused including test infrastructure so the majority of the change is moving and making things accessible. Contributes to #106460, #78744.
This commit is contained in:
parent
4ef8b3825e
commit
e1d902d33b
16 changed files with 824 additions and 300 deletions
19
plugins/mapper-annotated-text/src/main/java/module-info.java
Normal file
19
plugins/mapper-annotated-text/src/main/java/module-info.java
Normal file
|
@ -0,0 +1,19 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
module org.elasticsearch.index.mapper.annotatedtext {
|
||||
requires org.elasticsearch.base;
|
||||
requires org.elasticsearch.server;
|
||||
requires org.elasticsearch.xcontent;
|
||||
requires org.apache.lucene.core;
|
||||
requires org.apache.lucene.highlighter;
|
||||
|
||||
// exports nothing
|
||||
|
||||
provides org.elasticsearch.features.FeatureSpecification with org.elasticsearch.index.mapper.annotatedtext.Features;
|
||||
}
|
|
@ -21,17 +21,22 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.elasticsearch.ElasticsearchParseException;
|
||||
import org.elasticsearch.features.NodeFeature;
|
||||
import org.elasticsearch.index.IndexVersion;
|
||||
import org.elasticsearch.index.analysis.AnalyzerScope;
|
||||
import org.elasticsearch.index.analysis.IndexAnalyzers;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.mapper.DocumentParserContext;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
||||
import org.elasticsearch.index.mapper.MapperBuilderContext;
|
||||
import org.elasticsearch.index.mapper.SourceLoader;
|
||||
import org.elasticsearch.index.mapper.StringStoredFieldFieldLoader;
|
||||
import org.elasticsearch.index.mapper.TextFieldMapper;
|
||||
import org.elasticsearch.index.mapper.TextParams;
|
||||
import org.elasticsearch.index.mapper.TextSearchInfo;
|
||||
import org.elasticsearch.index.similarity.SimilarityProvider;
|
||||
import org.elasticsearch.xcontent.XContentBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
@ -41,6 +46,7 @@ import java.net.URLDecoder;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
@ -58,6 +64,8 @@ import java.util.regex.Pattern;
|
|||
**/
|
||||
public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||
|
||||
public static final NodeFeature SYNTHETIC_SOURCE_SUPPORT = new NodeFeature("mapper.annotated_text.synthetic_source");
|
||||
|
||||
public static final String CONTENT_TYPE = "annotated_text";
|
||||
|
||||
private static Builder builder(FieldMapper in) {
|
||||
|
@ -114,7 +122,7 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
|||
meta };
|
||||
}
|
||||
|
||||
private AnnotatedTextFieldType buildFieldType(FieldType fieldType, MapperBuilderContext context) {
|
||||
private AnnotatedTextFieldType buildFieldType(FieldType fieldType, MapperBuilderContext context, MultiFields multiFields) {
|
||||
TextSearchInfo tsi = new TextSearchInfo(
|
||||
fieldType,
|
||||
similarity.get(),
|
||||
|
@ -126,12 +134,14 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
|||
store.getValue(),
|
||||
tsi,
|
||||
context.isSourceSynthetic(),
|
||||
TextFieldMapper.SyntheticSourceHelper.syntheticSourceDelegate(fieldType, multiFields),
|
||||
meta.getValue()
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AnnotatedTextFieldMapper build(MapperBuilderContext context) {
|
||||
MultiFields multiFields = multiFieldsBuilder.build(this, context);
|
||||
FieldType fieldType = TextParams.buildFieldType(() -> true, store, indexOptions, norms, termVectors);
|
||||
if (fieldType.indexOptions() == IndexOptions.NONE) {
|
||||
throw new IllegalArgumentException("[" + CONTENT_TYPE + "] fields must be indexed");
|
||||
|
@ -146,8 +156,8 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
|||
return new AnnotatedTextFieldMapper(
|
||||
name(),
|
||||
fieldType,
|
||||
buildFieldType(fieldType, context),
|
||||
multiFieldsBuilder.build(this, context),
|
||||
buildFieldType(fieldType, context, multiFields),
|
||||
multiFields,
|
||||
copyTo,
|
||||
this
|
||||
);
|
||||
|
@ -472,15 +482,15 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
|||
}
|
||||
|
||||
public static final class AnnotatedTextFieldType extends TextFieldMapper.TextFieldType {
|
||||
|
||||
private AnnotatedTextFieldType(
|
||||
String name,
|
||||
boolean store,
|
||||
TextSearchInfo tsi,
|
||||
boolean isSyntheticSource,
|
||||
KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate,
|
||||
Map<String, String> meta
|
||||
) {
|
||||
super(name, true, store, tsi, isSyntheticSource, null, meta, false, false);
|
||||
super(name, true, store, tsi, isSyntheticSource, syntheticSourceDelegate, meta, false, false);
|
||||
}
|
||||
|
||||
public AnnotatedTextFieldType(String name, Map<String, String> meta) {
|
||||
|
@ -544,4 +554,36 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
|||
public FieldMapper.Builder getMergeBuilder() {
|
||||
return new Builder(simpleName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers).init(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
|
||||
if (copyTo.copyToFields().isEmpty() != true) {
|
||||
throw new IllegalArgumentException(
|
||||
"field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to"
|
||||
);
|
||||
}
|
||||
if (fieldType.stored()) {
|
||||
return new StringStoredFieldFieldLoader(name(), simpleName(), null) {
|
||||
@Override
|
||||
protected void write(XContentBuilder b, Object value) throws IOException {
|
||||
b.value((String) value);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(this);
|
||||
if (kwd != null) {
|
||||
return kwd.syntheticFieldLoader(simpleName());
|
||||
}
|
||||
|
||||
throw new IllegalArgumentException(
|
||||
String.format(
|
||||
Locale.ROOT,
|
||||
"field [%s] of type [%s] doesn't support synthetic source unless it is stored or has a sub-field of"
|
||||
+ " type [keyword] with doc values or stored and without a normalizer",
|
||||
name(),
|
||||
typeName()
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.mapper.annotatedtext;
|
||||
|
||||
import org.elasticsearch.features.FeatureSpecification;
|
||||
import org.elasticsearch.features.NodeFeature;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Provides features for annotated text mapper.
|
||||
*/
|
||||
public class Features implements FeatureSpecification {
|
||||
@Override
|
||||
public Set<NodeFeature> getFeatures() {
|
||||
return Set.of(
|
||||
AnnotatedTextFieldMapper.SYNTHETIC_SOURCE_SUPPORT // Added in 8.15
|
||||
);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
#
|
||||
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
# or more contributor license agreements. Licensed under the Elastic License
|
||||
# 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
# in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
# Side Public License, v 1.
|
||||
#
|
||||
|
||||
org.elasticsearch.index.mapper.annotatedtext.Features
|
|
@ -14,6 +14,7 @@ import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
|||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
|
@ -29,6 +30,7 @@ import org.elasticsearch.index.analysis.AnalyzerScope;
|
|||
import org.elasticsearch.index.analysis.CharFilterFactory;
|
||||
import org.elasticsearch.index.analysis.CustomAnalyzer;
|
||||
import org.elasticsearch.index.analysis.IndexAnalyzers;
|
||||
import org.elasticsearch.index.analysis.LowercaseNormalizer;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.analysis.StandardTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||
|
@ -38,6 +40,7 @@ import org.elasticsearch.index.mapper.MapperParsingException;
|
|||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.mapper.MapperTestCase;
|
||||
import org.elasticsearch.index.mapper.ParsedDocument;
|
||||
import org.elasticsearch.index.mapper.TextFieldFamilySyntheticSourceTestSetup;
|
||||
import org.elasticsearch.index.mapper.TextFieldMapper;
|
||||
import org.elasticsearch.plugins.Plugin;
|
||||
import org.elasticsearch.xcontent.ToXContent;
|
||||
|
@ -54,6 +57,7 @@ import java.util.HashSet;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
|
||||
import static org.hamcrest.Matchers.containsString;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
@ -144,7 +148,8 @@ public class AnnotatedTextFieldMapperTests extends MapperTestCase {
|
|||
)
|
||||
);
|
||||
return IndexAnalyzers.of(
|
||||
Map.of("default", dflt, "standard", standard, "keyword", keyword, "whitespace", whitespace, "my_stop_analyzer", stop)
|
||||
Map.of("default", dflt, "standard", standard, "keyword", keyword, "whitespace", whitespace, "my_stop_analyzer", stop),
|
||||
Map.of("lowercase", new NamedAnalyzer("lowercase", AnalyzerScope.INDEX, new LowercaseNormalizer()))
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -595,7 +600,23 @@ public class AnnotatedTextFieldMapperTests extends MapperTestCase {
|
|||
|
||||
@Override
|
||||
protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) {
|
||||
throw new AssumptionViolatedException("not supported");
|
||||
assumeFalse("ignore_malformed not supported", ignoreMalformed);
|
||||
return TextFieldFamilySyntheticSourceTestSetup.syntheticSourceSupport("annotated_text", false);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BlockReaderSupport getSupportedReaders(MapperService mapper, String loaderFieldName) {
|
||||
return TextFieldFamilySyntheticSourceTestSetup.getSupportedReaders(mapper, loaderFieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Function<Object, Object> loadBlockExpected(BlockReaderSupport blockReaderSupport, boolean columnReader) {
|
||||
return TextFieldFamilySyntheticSourceTestSetup.loadBlockExpected(blockReaderSupport, columnReader);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void validateRoundTripReader(String syntheticSource, DirectoryReader reader, DirectoryReader roundTripReader) {
|
||||
TextFieldFamilySyntheticSourceTestSetup.validateRoundTripReader(syntheticSource, reader, roundTripReader);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,197 @@
|
|||
---
|
||||
setup:
|
||||
- requires:
|
||||
cluster_features: ["mapper.annotated_text.synthetic_source"]
|
||||
reason: introduced in 8.15.0
|
||||
|
||||
---
|
||||
stored annotated_text field:
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
mappings:
|
||||
_source:
|
||||
mode: synthetic
|
||||
properties:
|
||||
annotated_text:
|
||||
type: annotated_text
|
||||
store: true
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
id: 1
|
||||
refresh: true
|
||||
body:
|
||||
annotated_text: the quick brown fox
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
|
||||
- match:
|
||||
hits.hits.0._source:
|
||||
annotated_text: the quick brown fox
|
||||
|
||||
---
|
||||
annotated_text field with keyword multi-field:
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
mappings:
|
||||
_source:
|
||||
mode: synthetic
|
||||
properties:
|
||||
annotated_text:
|
||||
type: annotated_text
|
||||
fields:
|
||||
keyword:
|
||||
type: keyword
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
id: 1
|
||||
refresh: true
|
||||
body:
|
||||
annotated_text: the quick brown fox
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
|
||||
- match:
|
||||
hits.hits.0._source:
|
||||
annotated_text: the quick brown fox
|
||||
|
||||
---
|
||||
multiple values in stored annotated_text field:
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
mappings:
|
||||
_source:
|
||||
mode: synthetic
|
||||
properties:
|
||||
annotated_text:
|
||||
type: annotated_text
|
||||
store: true
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
id: 1
|
||||
refresh: true
|
||||
body:
|
||||
annotated_text: ["world", "hello", "world"]
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
|
||||
- match:
|
||||
hits.hits.0._source:
|
||||
annotated_text: ["world", "hello", "world"]
|
||||
|
||||
---
|
||||
multiple values in annotated_text field with keyword multi-field:
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
mappings:
|
||||
_source:
|
||||
mode: synthetic
|
||||
properties:
|
||||
annotated_text:
|
||||
type: annotated_text
|
||||
fields:
|
||||
keyword:
|
||||
type: keyword
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
id: 1
|
||||
refresh: true
|
||||
body:
|
||||
annotated_text: ["world", "hello", "world"]
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
|
||||
- match:
|
||||
hits.hits.0._source:
|
||||
annotated_text: ["hello", "world"]
|
||||
|
||||
|
||||
---
|
||||
multiple values in annotated_text field with stored keyword multi-field:
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
mappings:
|
||||
_source:
|
||||
mode: synthetic
|
||||
properties:
|
||||
annotated_text:
|
||||
type: annotated_text
|
||||
fields:
|
||||
keyword:
|
||||
type: keyword
|
||||
store: true
|
||||
doc_values: false
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
id: 1
|
||||
refresh: true
|
||||
body:
|
||||
annotated_text: ["world", "hello", "world"]
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
|
||||
- match:
|
||||
hits.hits.0._source:
|
||||
annotated_text: ["world", "hello", "world"]
|
||||
|
||||
---
|
||||
multiple values in stored annotated_text field with keyword multi-field:
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
mappings:
|
||||
_source:
|
||||
mode: synthetic
|
||||
properties:
|
||||
annotated_text:
|
||||
type: annotated_text
|
||||
store: true
|
||||
fields:
|
||||
keyword:
|
||||
type: keyword
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
id: 1
|
||||
refresh: true
|
||||
body:
|
||||
annotated_text: ["world", "hello", "world"]
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
|
||||
- match:
|
||||
hits.hits.0._source:
|
||||
annotated_text: ["world", "hello", "world"]
|
Loading…
Add table
Add a link
Reference in a new issue