Implement synthetic source support for annotated text field (#107735)

This PR adds synthetic source support for annotated_text fields. Existing implementation for text is reused including test infrastructure so the majority of the change is moving and making things accessible. Contributes to #106460, #78744.
2025-06-28 17:34:17 -04:00 · 2024-04-25 10:31:27 -07:00 · 2024-04-25 10:31:27 -07:00 · e1d902d33b
commit e1d902d33b
parent 4ef8b3825e
16 changed files with 824 additions and 300 deletions
--- a/plugins/mapper-annotated-text/src/main/java/module-info.java
+++ b/plugins/mapper-annotated-text/src/main/java/module-info.java
@ -0,0 +1,19 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+module org.elasticsearch.index.mapper.annotatedtext {
+    requires org.elasticsearch.base;
+    requires org.elasticsearch.server;
+    requires org.elasticsearch.xcontent;
+    requires org.apache.lucene.core;
+    requires org.apache.lucene.highlighter;
+
+    // exports nothing
+
+    provides org.elasticsearch.features.FeatureSpecification with org.elasticsearch.index.mapper.annotatedtext.Features;
+}
--- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java
+++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java
@ -21,17 +21,22 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.index.IndexOptions;
 import org.elasticsearch.ElasticsearchParseException;
+import org.elasticsearch.features.NodeFeature;
 import org.elasticsearch.index.IndexVersion;
 import org.elasticsearch.index.analysis.AnalyzerScope;
 import org.elasticsearch.index.analysis.IndexAnalyzers;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.index.mapper.DocumentParserContext;
 import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.MapperBuilderContext;
+import org.elasticsearch.index.mapper.SourceLoader;
+import org.elasticsearch.index.mapper.StringStoredFieldFieldLoader;
 import org.elasticsearch.index.mapper.TextFieldMapper;
 import org.elasticsearch.index.mapper.TextParams;
 import org.elasticsearch.index.mapper.TextSearchInfo;
 import org.elasticsearch.index.similarity.SimilarityProvider;
+import org.elasticsearch.xcontent.XContentBuilder;

 import java.io.IOException;
 import java.io.Reader;
@ -41,6 +46,7 @@ import java.net.URLDecoder;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@ -58,6 +64,8 @@ import java.util.regex.Pattern;
 **/
 public class AnnotatedTextFieldMapper extends FieldMapper {

+    public static final NodeFeature SYNTHETIC_SOURCE_SUPPORT = new NodeFeature("mapper.annotated_text.synthetic_source");
+
    public static final String CONTENT_TYPE = "annotated_text";

    private static Builder builder(FieldMapper in) {
@ -114,7 +122,7 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
                meta };
        }

-        private AnnotatedTextFieldType buildFieldType(FieldType fieldType, MapperBuilderContext context) {
+        private AnnotatedTextFieldType buildFieldType(FieldType fieldType, MapperBuilderContext context, MultiFields multiFields) {
            TextSearchInfo tsi = new TextSearchInfo(
                fieldType,
                similarity.get(),
@ -126,12 +134,14 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
                store.getValue(),
                tsi,
                context.isSourceSynthetic(),
+                TextFieldMapper.SyntheticSourceHelper.syntheticSourceDelegate(fieldType, multiFields),
                meta.getValue()
            );
        }

        @Override
        public AnnotatedTextFieldMapper build(MapperBuilderContext context) {
+            MultiFields multiFields = multiFieldsBuilder.build(this, context);
            FieldType fieldType = TextParams.buildFieldType(() -> true, store, indexOptions, norms, termVectors);
            if (fieldType.indexOptions() == IndexOptions.NONE) {
                throw new IllegalArgumentException("[" + CONTENT_TYPE + "] fields must be indexed");
@ -146,8 +156,8 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
            return new AnnotatedTextFieldMapper(
                name(),
                fieldType,
-                buildFieldType(fieldType, context),
-                multiFieldsBuilder.build(this, context),
+                buildFieldType(fieldType, context, multiFields),
+                multiFields,
                copyTo,
                this
            );
@ -472,15 +482,15 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
    }

    public static final class AnnotatedTextFieldType extends TextFieldMapper.TextFieldType {
-
        private AnnotatedTextFieldType(
            String name,
            boolean store,
            TextSearchInfo tsi,
            boolean isSyntheticSource,
+            KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate,
            Map<String, String> meta
        ) {
-            super(name, true, store, tsi, isSyntheticSource, null, meta, false, false);
+            super(name, true, store, tsi, isSyntheticSource, syntheticSourceDelegate, meta, false, false);
        }

        public AnnotatedTextFieldType(String name, Map<String, String> meta) {
@ -544,4 +554,36 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
    public FieldMapper.Builder getMergeBuilder() {
        return new Builder(simpleName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers).init(this);
    }
+
+    @Override
+    public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
+        if (copyTo.copyToFields().isEmpty() != true) {
+            throw new IllegalArgumentException(
+                "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to"
+            );
+        }
+        if (fieldType.stored()) {
+            return new StringStoredFieldFieldLoader(name(), simpleName(), null) {
+                @Override
+                protected void write(XContentBuilder b, Object value) throws IOException {
+                    b.value((String) value);
+                }
+            };
+        }
+
+        var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(this);
+        if (kwd != null) {
+            return kwd.syntheticFieldLoader(simpleName());
+        }
+
+        throw new IllegalArgumentException(
+            String.format(
+                Locale.ROOT,
+                "field [%s] of type [%s] doesn't support synthetic source unless it is stored or has a sub-field of"
+                    + " type [keyword] with doc values or stored and without a normalizer",
+                name(),
+                typeName()
+            )
+        );
+    }
 }
--- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/Features.java
+++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/Features.java
@ -0,0 +1,26 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.index.mapper.annotatedtext;
+
+import org.elasticsearch.features.FeatureSpecification;
+import org.elasticsearch.features.NodeFeature;
+
+import java.util.Set;
+
+/**
+ * Provides features for annotated text mapper.
+ */
+public class Features implements FeatureSpecification {
+    @Override
+    public Set<NodeFeature> getFeatures() {
+        return Set.of(
+            AnnotatedTextFieldMapper.SYNTHETIC_SOURCE_SUPPORT // Added in 8.15
+        );
+    }
+}
--- a/plugins/mapper-annotated-text/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification
+++ b/plugins/mapper-annotated-text/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification
@ -0,0 +1,9 @@
+#
+# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+# or more contributor license agreements. Licensed under the Elastic License
+# 2.0 and the Server Side Public License, v 1; you may not use this file except
+# in compliance with, at your election, the Elastic License 2.0 or the Server
+# Side Public License, v 1.
+#
+
+org.elasticsearch.index.mapper.annotatedtext.Features
--- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java
+++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java
@ -14,6 +14,7 @@ import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
@ -29,6 +30,7 @@ import org.elasticsearch.index.analysis.AnalyzerScope;
 import org.elasticsearch.index.analysis.CharFilterFactory;
 import org.elasticsearch.index.analysis.CustomAnalyzer;
 import org.elasticsearch.index.analysis.IndexAnalyzers;
+import org.elasticsearch.index.analysis.LowercaseNormalizer;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.index.analysis.StandardTokenizerFactory;
 import org.elasticsearch.index.analysis.TokenFilterFactory;
@ -38,6 +40,7 @@ import org.elasticsearch.index.mapper.MapperParsingException;
 import org.elasticsearch.index.mapper.MapperService;
 import org.elasticsearch.index.mapper.MapperTestCase;
 import org.elasticsearch.index.mapper.ParsedDocument;
+import org.elasticsearch.index.mapper.TextFieldFamilySyntheticSourceTestSetup;
 import org.elasticsearch.index.mapper.TextFieldMapper;
 import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.xcontent.ToXContent;
@ -54,6 +57,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.function.Function;

 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
@ -144,7 +148,8 @@ public class AnnotatedTextFieldMapperTests extends MapperTestCase {
            )
        );
        return IndexAnalyzers.of(
-            Map.of("default", dflt, "standard", standard, "keyword", keyword, "whitespace", whitespace, "my_stop_analyzer", stop)
+            Map.of("default", dflt, "standard", standard, "keyword", keyword, "whitespace", whitespace, "my_stop_analyzer", stop),
+            Map.of("lowercase", new NamedAnalyzer("lowercase", AnalyzerScope.INDEX, new LowercaseNormalizer()))
        );
    }

@ -595,7 +600,23 @@ public class AnnotatedTextFieldMapperTests extends MapperTestCase {

    @Override
    protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) {
-        throw new AssumptionViolatedException("not supported");
+        assumeFalse("ignore_malformed not supported", ignoreMalformed);
+        return TextFieldFamilySyntheticSourceTestSetup.syntheticSourceSupport("annotated_text", false);
+    }
+
+    @Override
+    protected BlockReaderSupport getSupportedReaders(MapperService mapper, String loaderFieldName) {
+        return TextFieldFamilySyntheticSourceTestSetup.getSupportedReaders(mapper, loaderFieldName);
+    }
+
+    @Override
+    protected Function<Object, Object> loadBlockExpected(BlockReaderSupport blockReaderSupport, boolean columnReader) {
+        return TextFieldFamilySyntheticSourceTestSetup.loadBlockExpected(blockReaderSupport, columnReader);
+    }
+
+    @Override
+    protected void validateRoundTripReader(String syntheticSource, DirectoryReader reader, DirectoryReader roundTripReader) {
+        TextFieldFamilySyntheticSourceTestSetup.validateRoundTripReader(syntheticSource, reader, roundTripReader);
    }

    @Override
--- a/plugins/mapper-annotated-text/src/yamlRestTest/resources/rest-api-spec/test/mapper_annotatedtext/20_synthetic_source.yml
+++ b/plugins/mapper-annotated-text/src/yamlRestTest/resources/rest-api-spec/test/mapper_annotatedtext/20_synthetic_source.yml
@ -0,0 +1,197 @@
+---
+setup:
+  - requires:
+      cluster_features: ["mapper.annotated_text.synthetic_source"]
+      reason: introduced in 8.15.0
+
+---
+stored annotated_text field:
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            _source:
+              mode: synthetic
+            properties:
+              annotated_text:
+                type: annotated_text
+                store: true
+
+  - do:
+      index:
+        index: test
+        id: 1
+        refresh: true
+        body:
+          annotated_text: the quick brown fox
+
+  - do:
+      search:
+        index: test
+
+  - match:
+      hits.hits.0._source:
+        annotated_text: the quick brown fox
+
+---
+annotated_text field with keyword multi-field:
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            _source:
+              mode: synthetic
+            properties:
+              annotated_text:
+                type: annotated_text
+                fields:
+                  keyword:
+                    type: keyword
+
+  - do:
+      index:
+        index: test
+        id: 1
+        refresh: true
+        body:
+          annotated_text: the quick brown fox
+
+  - do:
+      search:
+        index: test
+
+  - match:
+      hits.hits.0._source:
+        annotated_text: the quick brown fox
+
+---
+multiple values in stored annotated_text field:
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            _source:
+              mode: synthetic
+            properties:
+              annotated_text:
+                type: annotated_text
+                store: true
+
+  - do:
+      index:
+        index: test
+        id: 1
+        refresh: true
+        body:
+          annotated_text: ["world", "hello", "world"]
+
+  - do:
+      search:
+        index: test
+
+  - match:
+      hits.hits.0._source:
+        annotated_text: ["world", "hello", "world"]
+
+---
+multiple values in annotated_text field with keyword multi-field:
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            _source:
+              mode: synthetic
+            properties:
+              annotated_text:
+                type: annotated_text
+                fields:
+                  keyword:
+                    type: keyword
+
+  - do:
+      index:
+        index: test
+        id: 1
+        refresh: true
+        body:
+          annotated_text: ["world", "hello", "world"]
+
+  - do:
+      search:
+        index: test
+
+  - match:
+      hits.hits.0._source:
+        annotated_text: ["hello", "world"]
+
+
+---
+multiple values in annotated_text field with stored keyword multi-field:
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            _source:
+              mode: synthetic
+            properties:
+              annotated_text:
+                type: annotated_text
+                fields:
+                  keyword:
+                    type: keyword
+                    store: true
+                    doc_values: false
+
+  - do:
+      index:
+        index: test
+        id: 1
+        refresh: true
+        body:
+          annotated_text: ["world", "hello", "world"]
+
+  - do:
+      search:
+        index: test
+
+  - match:
+      hits.hits.0._source:
+        annotated_text: ["world", "hello", "world"]
+
+---
+multiple values in stored annotated_text field with keyword multi-field:
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            _source:
+              mode: synthetic
+            properties:
+              annotated_text:
+                type: annotated_text
+                store: true
+                fields:
+                  keyword:
+                    type: keyword
+
+  - do:
+      index:
+        index: test
+        id: 1
+        refresh: true
+        body:
+          annotated_text: ["world", "hello", "world"]
+
+  - do:
+      search:
+        index: test
+
+  - match:
+      hits.hits.0._source:
+        annotated_text: ["world", "hello", "world"]