mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-28 17:34:17 -04:00
Adding support to exclude semantic_text subfields (#127664)
* Adding support to exclude semantic_text subfields * Update docs/changelog/127664.yaml * Updating changelog file * remove duplicate test from yaml file * Adding support to exclude semantic_text subfields from mapper builders * Adding support for generic field types * refactoring to use builder and setting exclude value from semantic_text mapper * update in semantic_text mapper and fetcher to incorporate the support functionality * Fix code style issue * adding node feature for yaml tests * Adding more restrictive checks on yaml tests and few refactoring * Returns metadata fields from metadata mappers * returns all source fields for fieldcaps * gather all fields and iterate to process for fieldcaps api * revert back all changes from MappedFieldtype and subclasses * revert back exclude logic from semantic_text mapper * fix lint issues * fix lint issues * Adding runtime fields into fieldCaps * Fix linting issue * removing unused functions that used in previous implementation * fix multifield tests failure * getting alias fields for field caps * adding support for query time runtime fields * [CI] Auto commit changes from spotless * Fix empty mapping fieldCaps call * Address passthrough behavior for mappers * Fix SearchAsYoutype mapper failures * rename abstract method to have more meaningful name * Rename mapper function to match its functionality * Adding filtering for infernece subfields * revert back previous implementation changes * Adding yaml test for field caps not filtering multi-field * Fixing yaml test * Adding comment why .infernece filter is added --------- Co-authored-by: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co> Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
This commit is contained in:
parent
6370d600b0
commit
b50bb6b5fb
6 changed files with 117 additions and 1 deletions
5
docs/changelog/127664.yaml
Normal file
5
docs/changelog/127664.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
pr: 127664
|
||||
summary: Exclude `semantic_text` subfields from field capabilities API
|
||||
area: "Mapping"
|
||||
type: enhancement
|
||||
issues: []
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
package org.elasticsearch.action.fieldcaps;
|
||||
|
||||
import org.elasticsearch.cluster.metadata.InferenceFieldMetadata;
|
||||
import org.elasticsearch.cluster.metadata.MappingMetadata;
|
||||
import org.elasticsearch.core.Booleans;
|
||||
import org.elasticsearch.core.Nullable;
|
||||
|
@ -30,6 +31,7 @@ import org.elasticsearch.search.internal.ShardSearchRequest;
|
|||
import org.elasticsearch.tasks.CancellableTask;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
@ -256,6 +258,14 @@ class FieldCapabilitiesFetcher {
|
|||
Set<String> acceptedTypes = Set.of(fieldTypes);
|
||||
fcf = ft -> acceptedTypes.contains(ft.familyTypeName());
|
||||
}
|
||||
|
||||
// Exclude internal ".inference" subfields of semantic_text fields from the field capabilities response
|
||||
Collection<InferenceFieldMetadata> inferenceFields = context.getMappingLookup().inferenceFields().values();
|
||||
for (InferenceFieldMetadata inferenceField : inferenceFields) {
|
||||
Predicate<MappedFieldType> next = ft -> ft.name().startsWith(inferenceField.getName() + ".inference") == false;
|
||||
fcf = fcf == null ? next : fcf.and(next);
|
||||
}
|
||||
|
||||
for (String filter : filters) {
|
||||
if ("parent".equals(filter) || "-parent".equals(filter)) {
|
||||
continue;
|
||||
|
|
|
@ -15,6 +15,7 @@ import org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankR
|
|||
|
||||
import java.util.Set;
|
||||
|
||||
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_EXCLUDE_SUB_FIELDS_FROM_FIELD_CAPS;
|
||||
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG;
|
||||
import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_FILTER_FIX;
|
||||
import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED;
|
||||
|
@ -59,7 +60,8 @@ public class InferenceFeatures implements FeatureSpecification {
|
|||
SemanticTextFieldMapper.SEMANTIC_TEXT_HANDLE_EMPTY_INPUT,
|
||||
TEST_RULE_RETRIEVER_WITH_INDICES_THAT_DONT_RETURN_RANK_DOCS,
|
||||
SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG,
|
||||
SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER
|
||||
SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER,
|
||||
SEMANTIC_TEXT_EXCLUDE_SUB_FIELDS_FROM_FIELD_CAPS
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -134,6 +134,9 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
|
|||
public static final NodeFeature SEMANTIC_TEXT_SKIP_INFERENCE_FIELDS = new NodeFeature("semantic_text.skip_inference_fields");
|
||||
public static final NodeFeature SEMANTIC_TEXT_BIT_VECTOR_SUPPORT = new NodeFeature("semantic_text.bit_vector_support");
|
||||
public static final NodeFeature SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG = new NodeFeature("semantic_text.support_chunking_config");
|
||||
public static final NodeFeature SEMANTIC_TEXT_EXCLUDE_SUB_FIELDS_FROM_FIELD_CAPS = new NodeFeature(
|
||||
"semantic_text.exclude_sub_fields_from_field_caps"
|
||||
);
|
||||
|
||||
public static final String CONTENT_TYPE = "semantic_text";
|
||||
public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;
|
||||
|
|
|
@ -359,3 +359,76 @@ setup:
|
|||
index: test-always-include-inference-id-index
|
||||
|
||||
- exists: test-always-include-inference-id-index.mappings.properties.semantic_field.inference_id
|
||||
|
||||
---
|
||||
"Field caps exclude chunks and embedding fields":
|
||||
- requires:
|
||||
cluster_features: "semantic_text.exclude_sub_fields_from_field_caps"
|
||||
reason: field caps api exclude semantic_text subfields from 9.1.0 & 8.19.0
|
||||
|
||||
- do:
|
||||
field_caps:
|
||||
include_empty_fields: true
|
||||
index: test-index
|
||||
fields: "*"
|
||||
|
||||
- match: { indices: [ "test-index" ] }
|
||||
- exists: fields.sparse_field
|
||||
- exists: fields.dense_field
|
||||
- not_exists: fields.sparse_field.inference.chunks.embeddings
|
||||
- not_exists: fields.sparse_field.inference.chunks.offset
|
||||
- not_exists: fields.sparse_field.inference.chunks
|
||||
- not_exists: fields.sparse_field.inference
|
||||
- not_exists: fields.dense_field.inference.chunks.embeddings
|
||||
- not_exists: fields.dense_field.inference.chunks.offset
|
||||
- not_exists: fields.dense_field.inference.chunks
|
||||
- not_exists: fields.dense_field.inference
|
||||
|
||||
---
|
||||
"Field caps does not exclude multi-fields under semantic_text":
|
||||
- requires:
|
||||
cluster_features: "semantic_text.exclude_sub_fields_from_field_caps"
|
||||
reason: field caps api exclude semantic_text subfields from 9.1.0 & 8.19.0
|
||||
- do:
|
||||
indices.create:
|
||||
index: test-multi-field-index
|
||||
body:
|
||||
settings:
|
||||
index:
|
||||
mapping:
|
||||
semantic_text:
|
||||
use_legacy_format: false
|
||||
mappings:
|
||||
properties:
|
||||
sparse_field:
|
||||
type: semantic_text
|
||||
inference_id: sparse-inference-id
|
||||
fields:
|
||||
sparse_keyword_field:
|
||||
type: keyword
|
||||
dense_field:
|
||||
type: semantic_text
|
||||
inference_id: dense-inference-id
|
||||
fields:
|
||||
dense_keyword_field:
|
||||
type: keyword
|
||||
|
||||
- do:
|
||||
field_caps:
|
||||
include_empty_fields: true
|
||||
index: test-multi-field-index
|
||||
fields: "*"
|
||||
|
||||
- match: { indices: [ "test-multi-field-index" ] }
|
||||
- exists: fields.sparse_field
|
||||
- exists: fields.dense_field
|
||||
- exists: fields.sparse_field\.sparse_keyword_field
|
||||
- exists: fields.dense_field\.dense_keyword_field
|
||||
- not_exists: fields.sparse_field.inference.chunks.embeddings
|
||||
- not_exists: fields.sparse_field.inference.chunks.offset
|
||||
- not_exists: fields.sparse_field.inference.chunks
|
||||
- not_exists: fields.sparse_field.inference
|
||||
- not_exists: fields.dense_field.inference.chunks.embeddings
|
||||
- not_exists: fields.dense_field.inference.chunks.offset
|
||||
- not_exists: fields.dense_field.inference.chunks
|
||||
- not_exists: fields.dense_field.inference
|
||||
|
|
|
@ -307,3 +307,26 @@ setup:
|
|||
another_field:
|
||||
type: keyword
|
||||
|
||||
---
|
||||
"Field caps exclude chunks embedding and text fields":
|
||||
- requires:
|
||||
cluster_features: "semantic_text.exclude_sub_fields_from_field_caps"
|
||||
reason: field caps api exclude semantic_text subfields from 9.1.0 & 8.19.0
|
||||
|
||||
- do:
|
||||
field_caps:
|
||||
include_empty_fields: true
|
||||
index: test-index
|
||||
fields: "*"
|
||||
|
||||
- match: { indices: [ "test-index" ] }
|
||||
- exists: fields.sparse_field
|
||||
- exists: fields.dense_field
|
||||
- not_exists: fields.sparse_field.inference.chunks.embeddings
|
||||
- not_exists: fields.sparse_field.inference.chunks.text
|
||||
- not_exists: fields.sparse_field.inference.chunks
|
||||
- not_exists: fields.sparse_field.inference
|
||||
- not_exists: fields.dense_field.inference.chunks.embeddings
|
||||
- not_exists: fields.dense_field.inference.chunks.text
|
||||
- not_exists: fields.dense_field.inference.chunks
|
||||
- not_exists: fields.dense_field.inference
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue