Adding support to exclude semantic_text subfields (#127664)

* Adding support to exclude semantic_text subfields

* Update docs/changelog/127664.yaml

* Updating changelog file

* remove duplicate test from yaml file

* Adding support to exclude semantic_text subfields from mapper builders

* Adding support for generic field types

* refactoring to use builder and setting exclude value from semantic_text mapper

* update in semantic_text mapper and fetcher to incorporate the support functionality

* Fix code style issue

* adding node feature for yaml tests

* Adding more restrictive checks on yaml tests and few refactoring

* Returns metadata fields from metadata mappers

* returns all source fields for fieldcaps

* gather all fields and iterate to process for fieldcaps api

* revert back all changes from MappedFieldtype and subclasses

* revert back exclude logic from semantic_text mapper

* fix lint issues

* fix lint issues

* Adding runtime fields into fieldCaps

* Fix linting issue

* removing unused functions that used in previous implementation

* fix multifield tests failure

* getting alias fields for field caps

* adding support for query time runtime fields

* [CI] Auto commit changes from spotless

* Fix empty mapping fieldCaps call

* Address passthrough behavior for mappers

* Fix SearchAsYoutype mapper failures

* rename abstract method to have more meaningful name

* Rename mapper function to match its functionality

* Adding filtering for infernece subfields

* revert back previous implementation changes

* Adding yaml test for field caps not filtering multi-field

* Fixing yaml test

* Adding comment why .infernece filter is added

---------

Co-authored-by: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co>
Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
This commit is contained in:
Samiul Monir 2025-06-11 14:31:11 -04:00 committed by GitHub
parent 6370d600b0
commit b50bb6b5fb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 117 additions and 1 deletions

View file

@ -0,0 +1,5 @@
pr: 127664
summary: Exclude `semantic_text` subfields from field capabilities API
area: "Mapping"
type: enhancement
issues: []

View file

@ -9,6 +9,7 @@
package org.elasticsearch.action.fieldcaps; package org.elasticsearch.action.fieldcaps;
import org.elasticsearch.cluster.metadata.InferenceFieldMetadata;
import org.elasticsearch.cluster.metadata.MappingMetadata; import org.elasticsearch.cluster.metadata.MappingMetadata;
import org.elasticsearch.core.Booleans; import org.elasticsearch.core.Booleans;
import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Nullable;
@ -30,6 +31,7 @@ import org.elasticsearch.search.internal.ShardSearchRequest;
import org.elasticsearch.tasks.CancellableTask; import org.elasticsearch.tasks.CancellableTask;
import java.io.IOException; import java.io.IOException;
import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
@ -256,6 +258,14 @@ class FieldCapabilitiesFetcher {
Set<String> acceptedTypes = Set.of(fieldTypes); Set<String> acceptedTypes = Set.of(fieldTypes);
fcf = ft -> acceptedTypes.contains(ft.familyTypeName()); fcf = ft -> acceptedTypes.contains(ft.familyTypeName());
} }
// Exclude internal ".inference" subfields of semantic_text fields from the field capabilities response
Collection<InferenceFieldMetadata> inferenceFields = context.getMappingLookup().inferenceFields().values();
for (InferenceFieldMetadata inferenceField : inferenceFields) {
Predicate<MappedFieldType> next = ft -> ft.name().startsWith(inferenceField.getName() + ".inference") == false;
fcf = fcf == null ? next : fcf.and(next);
}
for (String filter : filters) { for (String filter : filters) {
if ("parent".equals(filter) || "-parent".equals(filter)) { if ("parent".equals(filter) || "-parent".equals(filter)) {
continue; continue;

View file

@ -15,6 +15,7 @@ import org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankR
import java.util.Set; import java.util.Set;
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_EXCLUDE_SUB_FIELDS_FROM_FIELD_CAPS;
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG;
import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_FILTER_FIX; import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_FILTER_FIX;
import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED; import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED;
@ -59,7 +60,8 @@ public class InferenceFeatures implements FeatureSpecification {
SemanticTextFieldMapper.SEMANTIC_TEXT_HANDLE_EMPTY_INPUT, SemanticTextFieldMapper.SEMANTIC_TEXT_HANDLE_EMPTY_INPUT,
TEST_RULE_RETRIEVER_WITH_INDICES_THAT_DONT_RETURN_RANK_DOCS, TEST_RULE_RETRIEVER_WITH_INDICES_THAT_DONT_RETURN_RANK_DOCS,
SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG, SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG,
SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER,
SEMANTIC_TEXT_EXCLUDE_SUB_FIELDS_FROM_FIELD_CAPS
); );
} }
} }

View file

@ -134,6 +134,9 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
public static final NodeFeature SEMANTIC_TEXT_SKIP_INFERENCE_FIELDS = new NodeFeature("semantic_text.skip_inference_fields"); public static final NodeFeature SEMANTIC_TEXT_SKIP_INFERENCE_FIELDS = new NodeFeature("semantic_text.skip_inference_fields");
public static final NodeFeature SEMANTIC_TEXT_BIT_VECTOR_SUPPORT = new NodeFeature("semantic_text.bit_vector_support"); public static final NodeFeature SEMANTIC_TEXT_BIT_VECTOR_SUPPORT = new NodeFeature("semantic_text.bit_vector_support");
public static final NodeFeature SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG = new NodeFeature("semantic_text.support_chunking_config"); public static final NodeFeature SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG = new NodeFeature("semantic_text.support_chunking_config");
public static final NodeFeature SEMANTIC_TEXT_EXCLUDE_SUB_FIELDS_FROM_FIELD_CAPS = new NodeFeature(
"semantic_text.exclude_sub_fields_from_field_caps"
);
public static final String CONTENT_TYPE = "semantic_text"; public static final String CONTENT_TYPE = "semantic_text";
public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID; public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;

View file

@ -359,3 +359,76 @@ setup:
index: test-always-include-inference-id-index index: test-always-include-inference-id-index
- exists: test-always-include-inference-id-index.mappings.properties.semantic_field.inference_id - exists: test-always-include-inference-id-index.mappings.properties.semantic_field.inference_id
---
"Field caps exclude chunks and embedding fields":
- requires:
cluster_features: "semantic_text.exclude_sub_fields_from_field_caps"
reason: field caps api exclude semantic_text subfields from 9.1.0 & 8.19.0
- do:
field_caps:
include_empty_fields: true
index: test-index
fields: "*"
- match: { indices: [ "test-index" ] }
- exists: fields.sparse_field
- exists: fields.dense_field
- not_exists: fields.sparse_field.inference.chunks.embeddings
- not_exists: fields.sparse_field.inference.chunks.offset
- not_exists: fields.sparse_field.inference.chunks
- not_exists: fields.sparse_field.inference
- not_exists: fields.dense_field.inference.chunks.embeddings
- not_exists: fields.dense_field.inference.chunks.offset
- not_exists: fields.dense_field.inference.chunks
- not_exists: fields.dense_field.inference
---
"Field caps does not exclude multi-fields under semantic_text":
- requires:
cluster_features: "semantic_text.exclude_sub_fields_from_field_caps"
reason: field caps api exclude semantic_text subfields from 9.1.0 & 8.19.0
- do:
indices.create:
index: test-multi-field-index
body:
settings:
index:
mapping:
semantic_text:
use_legacy_format: false
mappings:
properties:
sparse_field:
type: semantic_text
inference_id: sparse-inference-id
fields:
sparse_keyword_field:
type: keyword
dense_field:
type: semantic_text
inference_id: dense-inference-id
fields:
dense_keyword_field:
type: keyword
- do:
field_caps:
include_empty_fields: true
index: test-multi-field-index
fields: "*"
- match: { indices: [ "test-multi-field-index" ] }
- exists: fields.sparse_field
- exists: fields.dense_field
- exists: fields.sparse_field\.sparse_keyword_field
- exists: fields.dense_field\.dense_keyword_field
- not_exists: fields.sparse_field.inference.chunks.embeddings
- not_exists: fields.sparse_field.inference.chunks.offset
- not_exists: fields.sparse_field.inference.chunks
- not_exists: fields.sparse_field.inference
- not_exists: fields.dense_field.inference.chunks.embeddings
- not_exists: fields.dense_field.inference.chunks.offset
- not_exists: fields.dense_field.inference.chunks
- not_exists: fields.dense_field.inference

View file

@ -307,3 +307,26 @@ setup:
another_field: another_field:
type: keyword type: keyword
---
"Field caps exclude chunks embedding and text fields":
- requires:
cluster_features: "semantic_text.exclude_sub_fields_from_field_caps"
reason: field caps api exclude semantic_text subfields from 9.1.0 & 8.19.0
- do:
field_caps:
include_empty_fields: true
index: test-index
fields: "*"
- match: { indices: [ "test-index" ] }
- exists: fields.sparse_field
- exists: fields.dense_field
- not_exists: fields.sparse_field.inference.chunks.embeddings
- not_exists: fields.sparse_field.inference.chunks.text
- not_exists: fields.sparse_field.inference.chunks
- not_exists: fields.sparse_field.inference
- not_exists: fields.dense_field.inference.chunks.embeddings
- not_exists: fields.dense_field.inference.chunks.text
- not_exists: fields.dense_field.inference.chunks
- not_exists: fields.dense_field.inference