Update sparse_vector field mapping to include default setting for token pruning (#129089)

* Initial checkin of refactored index_options code

* [CI] Auto commit changes from spotless

* initial unit testing

* complete unit tests; add yaml tests

* [CI] Auto commit changes from spotless

* register test feature for sparse vector

* Update docs/changelog/129089.yaml

* update changelog

* add docs

* explicit set default index_options if null

* [CI] Auto commit changes from spotless

* update yaml tests; update docs

* fix yaml tests

* readd auth for teardown

* only serialize index options if not default

* [CI] Auto commit changes from spotless

* serialization refactor; pass index version around

* [CI] Auto commit changes from spotless

* fix transport versions merge

* fix up docs

* [CI] Auto commit changes from spotless

* fix docs; add include_defaults unit and yaml test

* [CI] Auto commit changes from spotless

* override getIndexReaderManager for SemanticQueryBuilderTests

* [CI] Auto commit changes from spotless

* cleanup mapper/builder/tests; index vers. in type

still need to refactor / clean YAML tests

* [CI] Auto commit changes from spotless

* cleanups to mapper tests for clarity

* [CI] Auto commit changes from spotless

* move feature into mappers; fix yaml tests

* cleanups; add comments; remove redundant test

* [CI] Auto commit changes from spotless

* escape more periods in the YAML tests

* cleanup mapper and type tests

* [CI] Auto commit changes from spotless

* rename mapping for previous index test

* set explicit number of shards for yaml test

---------

Co-authored-by: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co>
Co-authored-by: Kathleen DeRusso <kathleen.derusso@elastic.co>
This commit is contained in:
Mark J. Hoy 2025-06-23 18:21:32 -04:00 committed by GitHub
parent a324853d43
commit a671505c8a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 2408 additions and 50 deletions

View file

@ -203,7 +203,7 @@ public class TransportVersions {
public static final TransportVersion ML_INFERENCE_CUSTOM_SERVICE_INPUT_TYPE_8_19 = def(8_841_0_55);
public static final TransportVersion RANDOM_SAMPLER_QUERY_BUILDER_8_19 = def(8_841_0_56);
public static final TransportVersion ML_INFERENCE_SAGEMAKER_ELASTIC_8_19 = def(8_841_0_57);
public static final TransportVersion SPARSE_VECTOR_FIELD_PRUNING_OPTIONS_8_19 = def(8_841_0_58);
public static final TransportVersion V_9_0_0 = def(9_000_0_09);
public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_1 = def(9_000_0_10);
public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_2 = def(9_000_0_11);
@ -313,6 +313,7 @@ public class TransportVersions {
public static final TransportVersion STREAMS_LOGS_SUPPORT = def(9_104_0_00);
public static final TransportVersion ML_INFERENCE_CUSTOM_SERVICE_INPUT_TYPE = def(9_105_0_00);
public static final TransportVersion ML_INFERENCE_SAGEMAKER_ELASTIC = def(9_106_0_00);
public static final TransportVersion SPARSE_VECTOR_FIELD_PRUNING_OPTIONS = def(9_107_0_00);
/*
* STOP! READ THIS FIRST! No, really,

View file

@ -144,6 +144,7 @@ public class IndexVersions {
public static final IndexVersion INDEX_INT_SORT_INT_TYPE_8_19 = def(8_532_0_00, Version.LUCENE_9_12_1);
public static final IndexVersion MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED_8_19 = def(8_533_0_00, Version.LUCENE_9_12_1);
public static final IndexVersion UPGRADE_TO_LUCENE_9_12_2 = def(8_534_0_00, Version.LUCENE_9_12_2);
public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X = def(8_535_0_00, Version.LUCENE_9_12_2);
public static final IndexVersion UPGRADE_TO_LUCENE_10_0_0 = def(9_000_0_00, Version.LUCENE_10_0_0);
public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT = def(9_001_0_00, Version.LUCENE_10_0_0);
public static final IndexVersion TIME_BASED_K_ORDERED_DOC_ID = def(9_002_0_00, Version.LUCENE_10_0_0);
@ -175,6 +176,7 @@ public class IndexVersions {
public static final IndexVersion INDEX_INT_SORT_INT_TYPE = def(9_028_0_00, Version.LUCENE_10_2_1);
public static final IndexVersion MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED = def(9_029_0_00, Version.LUCENE_10_2_1);
public static final IndexVersion UPGRADE_TO_LUCENE_10_2_2 = def(9_030_0_00, Version.LUCENE_10_2_2);
public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT = def(9_031_0_00, Version.LUCENE_10_2_2);
/*
* STOP! READ THIS FIRST! No, really,

View file

@ -17,6 +17,7 @@ import java.util.Set;
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING;
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING;
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ;
import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.SPARSE_VECTOR_INDEX_OPTIONS_FEATURE;
/**
* Spec for mapper-related features.
@ -74,7 +75,8 @@ public class MapperFeatures implements FeatureSpecification {
USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ,
IVF_FORMAT_CLUSTER_FEATURE,
IVF_NESTED_SUPPORT,
SEARCH_LOAD_PER_SHARD
SEARCH_LOAD_PER_SHARD,
SPARSE_VECTOR_INDEX_OPTIONS_FEATURE
);
}
}

View file

@ -22,6 +22,9 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.logging.DeprecationCategory;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.features.NodeFeature;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.analysis.NamedAnalyzer;
@ -31,6 +34,7 @@ import org.elasticsearch.index.mapper.DocumentParserContext;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperBuilderContext;
import org.elasticsearch.index.mapper.MappingParserContext;
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.index.mapper.SourceValueFetcher;
import org.elasticsearch.index.mapper.TextSearchInfo;
@ -40,17 +44,27 @@ import org.elasticsearch.inference.WeightedToken;
import org.elasticsearch.inference.WeightedTokensUtils;
import org.elasticsearch.search.fetch.StoredFieldsSpec;
import org.elasticsearch.search.lookup.Source;
import org.elasticsearch.xcontent.ConstructingObjectParser;
import org.elasticsearch.xcontent.DeprecationHandler;
import org.elasticsearch.xcontent.NamedXContentRegistry;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.ToXContent;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentParser.Token;
import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.xcontent.support.MapXContentParser;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Stream;
import static org.elasticsearch.index.query.AbstractQueryBuilder.DEFAULT_BOOST;
import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;
/**
* A {@link FieldMapper} that exposes Lucene's {@link FeatureField} as a sparse
@ -59,6 +73,7 @@ import static org.elasticsearch.index.query.AbstractQueryBuilder.DEFAULT_BOOST;
public class SparseVectorFieldMapper extends FieldMapper {
public static final String CONTENT_TYPE = "sparse_vector";
public static final String SPARSE_VECTOR_INDEX_OPTIONS = "index_options";
static final String ERROR_MESSAGE_7X = "[sparse_vector] field type in old 7.x indices is allowed to "
+ "contain [sparse_vector] fields, but they cannot be indexed or searched.";
@ -67,17 +82,34 @@ public class SparseVectorFieldMapper extends FieldMapper {
static final IndexVersion NEW_SPARSE_VECTOR_INDEX_VERSION = IndexVersions.NEW_SPARSE_VECTOR;
static final IndexVersion SPARSE_VECTOR_IN_FIELD_NAMES_INDEX_VERSION = IndexVersions.SPARSE_VECTOR_IN_FIELD_NAMES_SUPPORT;
static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION = IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT;
static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X =
IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X;
public static final NodeFeature SPARSE_VECTOR_INDEX_OPTIONS_FEATURE = new NodeFeature("sparse_vector.index_options_supported");
private static SparseVectorFieldMapper toType(FieldMapper in) {
return (SparseVectorFieldMapper) in;
}
public static class Builder extends FieldMapper.Builder {
private final IndexVersion indexVersionCreated;
private final Parameter<Boolean> stored = Parameter.storeParam(m -> toType(m).fieldType().isStored(), false);
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
private final Parameter<IndexOptions> indexOptions = new Parameter<>(
SPARSE_VECTOR_INDEX_OPTIONS,
true,
() -> null,
(n, c, o) -> parseIndexOptions(c, o),
m -> toType(m).fieldType().indexOptions,
XContentBuilder::field,
Objects::toString
).acceptsNull().setSerializerCheck(this::indexOptionsSerializerCheck);
public Builder(String name) {
public Builder(String name, IndexVersion indexVersionCreated) {
super(name);
this.indexVersionCreated = indexVersionCreated;
}
public Builder setStored(boolean value) {
@ -87,17 +119,74 @@ public class SparseVectorFieldMapper extends FieldMapper {
@Override
protected Parameter<?>[] getParameters() {
return new Parameter<?>[] { stored, meta };
return new Parameter<?>[] { stored, meta, indexOptions };
}
@Override
public SparseVectorFieldMapper build(MapperBuilderContext context) {
IndexOptions builderIndexOptions = indexOptions.getValue();
if (builderIndexOptions == null) {
builderIndexOptions = getDefaultIndexOptions(indexVersionCreated);
}
return new SparseVectorFieldMapper(
leafName(),
new SparseVectorFieldType(context.buildFullName(leafName()), stored.getValue(), meta.getValue()),
new SparseVectorFieldType(
indexVersionCreated,
context.buildFullName(leafName()),
stored.getValue(),
meta.getValue(),
builderIndexOptions
),
builderParams(this, context)
);
}
private IndexOptions getDefaultIndexOptions(IndexVersion indexVersion) {
return (indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION)
|| indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0))
? IndexOptions.DEFAULT_PRUNING_INDEX_OPTIONS
: null;
}
private boolean indexOptionsSerializerCheck(boolean includeDefaults, boolean isConfigured, IndexOptions value) {
return includeDefaults || (IndexOptions.isDefaultOptions(value, indexVersionCreated) == false);
}
}
public IndexOptions getIndexOptions() {
return fieldType().getIndexOptions();
}
private static final ConstructingObjectParser<IndexOptions, Void> INDEX_OPTIONS_PARSER = new ConstructingObjectParser<>(
SPARSE_VECTOR_INDEX_OPTIONS,
args -> new IndexOptions((Boolean) args[0], (TokenPruningConfig) args[1])
);
static {
INDEX_OPTIONS_PARSER.declareBoolean(optionalConstructorArg(), IndexOptions.PRUNE_FIELD_NAME);
INDEX_OPTIONS_PARSER.declareObject(optionalConstructorArg(), TokenPruningConfig.PARSER, IndexOptions.PRUNING_CONFIG_FIELD_NAME);
}
private static SparseVectorFieldMapper.IndexOptions parseIndexOptions(MappingParserContext context, Object propNode) {
if (propNode == null) {
return null;
}
Map<String, Object> indexOptionsMap = XContentMapValues.nodeMapValue(propNode, SPARSE_VECTOR_INDEX_OPTIONS);
XContentParser parser = new MapXContentParser(
NamedXContentRegistry.EMPTY,
DeprecationHandler.IGNORE_DEPRECATIONS,
indexOptionsMap,
XContentType.JSON
);
try {
return INDEX_OPTIONS_PARSER.parse(parser, null);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
public static final TypeParser PARSER = new TypeParser((n, c) -> {
@ -107,13 +196,31 @@ public class SparseVectorFieldMapper extends FieldMapper {
throw new IllegalArgumentException(ERROR_MESSAGE_8X);
}
return new Builder(n);
return new Builder(n, c.indexVersionCreated());
}, notInMultiFields(CONTENT_TYPE));
public static final class SparseVectorFieldType extends MappedFieldType {
private final IndexVersion indexVersionCreated;
private final IndexOptions indexOptions;
public SparseVectorFieldType(String name, boolean isStored, Map<String, String> meta) {
public SparseVectorFieldType(IndexVersion indexVersionCreated, String name, boolean isStored, Map<String, String> meta) {
this(indexVersionCreated, name, isStored, meta, null);
}
public SparseVectorFieldType(
IndexVersion indexVersionCreated,
String name,
boolean isStored,
Map<String, String> meta,
@Nullable SparseVectorFieldMapper.IndexOptions indexOptions
) {
super(name, true, isStored, false, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
this.indexVersionCreated = indexVersionCreated;
this.indexOptions = indexOptions;
}
public IndexOptions getIndexOptions() {
return indexOptions;
}
@Override
@ -160,11 +267,30 @@ public class SparseVectorFieldMapper extends FieldMapper {
SearchExecutionContext context,
String fieldName,
List<WeightedToken> queryVectors,
boolean shouldPruneTokens,
TokenPruningConfig tokenPruningConfig
Boolean shouldPruneTokensFromQuery,
TokenPruningConfig tokenPruningConfigFromQuery
) throws IOException {
return (shouldPruneTokens)
? WeightedTokensUtils.queryBuilderWithPrunedTokens(fieldName, tokenPruningConfig, queryVectors, this, context)
Boolean shouldPruneTokens = shouldPruneTokensFromQuery;
TokenPruningConfig tokenPruningConfig = tokenPruningConfigFromQuery;
if (indexOptions != null) {
if (shouldPruneTokens == null && indexOptions.prune != null) {
shouldPruneTokens = indexOptions.prune;
}
if (tokenPruningConfig == null && indexOptions.pruningConfig != null) {
tokenPruningConfig = indexOptions.pruningConfig;
}
}
return (shouldPruneTokens != null && shouldPruneTokens)
? WeightedTokensUtils.queryBuilderWithPrunedTokens(
fieldName,
tokenPruningConfig == null ? new TokenPruningConfig() : tokenPruningConfig,
queryVectors,
this,
context
)
: WeightedTokensUtils.queryBuilderWithAllTokens(fieldName, queryVectors, this, context);
}
@ -195,7 +321,7 @@ public class SparseVectorFieldMapper extends FieldMapper {
@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName()).init(this);
return new Builder(leafName(), this.fieldType().indexVersionCreated).init(this);
}
@Override
@ -273,6 +399,12 @@ public class SparseVectorFieldMapper extends FieldMapper {
return CONTENT_TYPE;
}
private static boolean indexVersionSupportsDefaultPruningConfig(IndexVersion indexVersion) {
// default pruning for 9.1.0+ or 8.19.0+ is true for this index
return (indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION)
|| indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0));
}
private static class SparseVectorValueFetcher implements ValueFetcher {
private final String fieldName;
private TermVectors termVectors;
@ -383,4 +515,79 @@ public class SparseVectorFieldMapper extends FieldMapper {
}
}
public static class IndexOptions implements ToXContent {
public static final ParseField PRUNE_FIELD_NAME = new ParseField("prune");
public static final ParseField PRUNING_CONFIG_FIELD_NAME = new ParseField("pruning_config");
public static final IndexOptions DEFAULT_PRUNING_INDEX_OPTIONS = new IndexOptions(true, new TokenPruningConfig());
final Boolean prune;
final TokenPruningConfig pruningConfig;
IndexOptions(@Nullable Boolean prune, @Nullable TokenPruningConfig pruningConfig) {
if (pruningConfig != null && (prune == null || prune == false)) {
throw new IllegalArgumentException(
"["
+ SPARSE_VECTOR_INDEX_OPTIONS
+ "] field ["
+ PRUNING_CONFIG_FIELD_NAME.getPreferredName()
+ "] should only be set if ["
+ PRUNE_FIELD_NAME.getPreferredName()
+ "] is set to true"
);
}
this.prune = prune;
this.pruningConfig = pruningConfig;
}
public static boolean isDefaultOptions(IndexOptions indexOptions, IndexVersion indexVersion) {
IndexOptions defaultIndexOptions = indexVersionSupportsDefaultPruningConfig(indexVersion)
? DEFAULT_PRUNING_INDEX_OPTIONS
: null;
return Objects.equals(indexOptions, defaultIndexOptions);
}
public Boolean getPrune() {
return prune;
}
public TokenPruningConfig getPruningConfig() {
return pruningConfig;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
if (prune != null) {
builder.field(PRUNE_FIELD_NAME.getPreferredName(), prune);
}
if (pruningConfig != null) {
builder.field(PRUNING_CONFIG_FIELD_NAME.getPreferredName(), pruningConfig);
}
builder.endObject();
return builder;
}
@Override
public final boolean equals(Object other) {
if (other == this) {
return true;
}
if (other == null || getClass() != other.getClass()) {
return false;
}
IndexOptions otherAsIndexOptions = (IndexOptions) other;
return Objects.equals(prune, otherAsIndexOptions.prune) && Objects.equals(pruningConfig, otherAsIndexOptions.pruningConfig);
}
@Override
public final int hashCode() {
return Objects.hash(prune, pruningConfig);
}
}
}

View file

@ -14,16 +14,25 @@ import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.xcontent.ConstructingObjectParser;
import org.elasticsearch.xcontent.DeprecationHandler;
import org.elasticsearch.xcontent.NamedXContentRegistry;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.ToXContentObject;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.xcontent.support.MapXContentParser;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;
public class TokenPruningConfig implements Writeable, ToXContentObject {
public static final String PRUNING_CONFIG_FIELD = "pruning_config";
public static final ParseField TOKENS_FREQ_RATIO_THRESHOLD = new ParseField("tokens_freq_ratio_threshold");
@ -176,4 +185,38 @@ public class TokenPruningConfig implements Writeable, ToXContentObject {
}
return new TokenPruningConfig(ratioThreshold, weightThreshold, onlyScorePrunedTokens);
}
public static final ConstructingObjectParser<TokenPruningConfig, Void> PARSER = new ConstructingObjectParser<>(
PRUNING_CONFIG_FIELD,
args -> new TokenPruningConfig(
args[0] == null ? DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD : (Float) args[0],
args[1] == null ? DEFAULT_TOKENS_WEIGHT_THRESHOLD : (Float) args[1],
args[2] != null && (Boolean) args[2]
)
);
static {
PARSER.declareFloat(optionalConstructorArg(), TOKENS_FREQ_RATIO_THRESHOLD);
PARSER.declareFloat(optionalConstructorArg(), TOKENS_WEIGHT_THRESHOLD);
PARSER.declareBoolean(optionalConstructorArg(), ONLY_SCORE_PRUNED_TOKENS_FIELD);
}
public static TokenPruningConfig parseFromMap(Map<String, Object> pruningConfigMap) {
if (pruningConfigMap == null) {
return null;
}
try {
XContentParser parser = new MapXContentParser(
NamedXContentRegistry.EMPTY,
DeprecationHandler.IGNORE_DEPRECATIONS,
pruningConfigMap,
XContentType.JSON
);
return PARSER.parse(parser, null);
} catch (IOException ioEx) {
throw new UncheckedIOException(ioEx);
}
}
}

View file

@ -14,11 +14,17 @@ import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.core.CheckedConsumer;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.mapper.DocumentMapper;
@ -28,20 +34,32 @@ import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.MapperTestCase;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.inference.WeightedToken;
import org.elasticsearch.search.lookup.Source;
import org.elasticsearch.search.vectors.SparseVectorQueryWrapper;
import org.elasticsearch.test.index.IndexVersionUtils;
import org.elasticsearch.xcontent.ToXContent;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParseException;
import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.xcontent.json.JsonXContent;
import org.hamcrest.Matchers;
import org.junit.AssumptionViolatedException;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import static org.elasticsearch.index.IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT;
import static org.elasticsearch.index.IndexVersions.UPGRADE_TO_LUCENE_10_0_0;
import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.NEW_SPARSE_VECTOR_INDEX_VERSION;
import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.PREVIOUS_SPARSE_VECTOR_INDEX_VERSION;
import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertToXContentEquivalent;
import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
@ -67,6 +85,98 @@ public class SparseVectorFieldMapperTests extends MapperTestCase {
b.field("type", "sparse_vector");
}
protected void minimalFieldMappingPreviousIndexDefaultsIncluded(XContentBuilder b) throws IOException {
b.field("type", "sparse_vector");
b.field("store", false);
b.startObject("meta");
b.endObject();
b.field("index_options", (Object) null);
}
protected void minimalMappingWithExplicitDefaults(XContentBuilder b) throws IOException {
b.field("type", "sparse_vector");
b.field("store", false);
b.startObject("meta");
b.endObject();
b.startObject("index_options");
{
b.field("prune", true);
b.startObject("pruning_config");
{
b.field("tokens_freq_ratio_threshold", TokenPruningConfig.DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD);
b.field("tokens_weight_threshold", TokenPruningConfig.DEFAULT_TOKENS_WEIGHT_THRESHOLD);
}
b.endObject();
}
b.endObject();
}
protected void minimalMappingWithExplicitIndexOptions(XContentBuilder b) throws IOException {
b.field("type", "sparse_vector");
b.startObject("index_options");
{
b.field("prune", true);
b.startObject("pruning_config");
{
b.field("tokens_freq_ratio_threshold", 3.0f);
b.field("tokens_weight_threshold", 0.5f);
}
b.endObject();
}
b.endObject();
}
protected void serializedMappingWithSomeIndexOptions(XContentBuilder b) throws IOException {
b.field("type", "sparse_vector");
b.startObject("index_options");
{
b.field("prune", true);
b.startObject("pruning_config");
{
b.field("tokens_freq_ratio_threshold", 3.0f);
b.field("tokens_weight_threshold", TokenPruningConfig.DEFAULT_TOKENS_WEIGHT_THRESHOLD);
}
b.endObject();
}
b.endObject();
}
protected void minimalMappingWithSomeExplicitIndexOptions(XContentBuilder b) throws IOException {
b.field("type", "sparse_vector");
b.startObject("index_options");
{
b.field("prune", true);
b.startObject("pruning_config");
{
b.field("tokens_freq_ratio_threshold", 3.0f);
}
b.endObject();
}
b.endObject();
}
protected void mappingWithIndexOptionsOnlyPruneTrue(XContentBuilder b) throws IOException {
b.field("type", "sparse_vector");
b.startObject("index_options");
{
b.field("prune", true);
}
b.endObject();
}
protected void mappingWithIndexOptionsPruneFalse(XContentBuilder b) throws IOException {
b.field("type", "sparse_vector");
b.startObject("index_options");
{
b.field("prune", false);
}
b.endObject();
}
@Override
protected boolean supportsStoredFields() {
return false;
@ -120,6 +230,84 @@ public class SparseVectorFieldMapperTests extends MapperTestCase {
assertTrue(freq1 < freq2);
}
public void testDefaultsWithAndWithoutIncludeDefaults() throws Exception {
XContentBuilder orig = JsonXContent.contentBuilder().startObject();
createMapperService(fieldMapping(this::minimalMapping)).mappingLookup().getMapper("field").toXContent(orig, INCLUDE_DEFAULTS);
orig.endObject();
XContentBuilder withDefaults = JsonXContent.contentBuilder().startObject();
withDefaults.startObject("field");
minimalMappingWithExplicitDefaults(withDefaults);
withDefaults.endObject();
withDefaults.endObject();
assertToXContentEquivalent(BytesReference.bytes(withDefaults), BytesReference.bytes(orig), XContentType.JSON);
XContentBuilder origWithoutDefaults = JsonXContent.contentBuilder().startObject();
createMapperService(fieldMapping(this::minimalMapping)).mappingLookup()
.getMapper("field")
.toXContent(origWithoutDefaults, ToXContent.EMPTY_PARAMS);
origWithoutDefaults.endObject();
XContentBuilder withoutDefaults = JsonXContent.contentBuilder().startObject();
withoutDefaults.startObject("field");
minimalMapping(withoutDefaults);
withoutDefaults.endObject();
withoutDefaults.endObject();
assertToXContentEquivalent(BytesReference.bytes(withoutDefaults), BytesReference.bytes(origWithoutDefaults), XContentType.JSON);
}
public void testDefaultsWithAndWithoutIncludeDefaultsOlderIndexVersion() throws Exception {
IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween(
random(),
UPGRADE_TO_LUCENE_10_0_0,
IndexVersionUtils.getPreviousVersion(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION)
);
XContentBuilder orig = JsonXContent.contentBuilder().startObject();
createMapperService(indexVersion, fieldMapping(this::minimalMapping)).mappingLookup()
.getMapper("field")
.toXContent(orig, INCLUDE_DEFAULTS);
orig.endObject();
XContentBuilder withDefaults = JsonXContent.contentBuilder().startObject();
withDefaults.startObject("field");
minimalFieldMappingPreviousIndexDefaultsIncluded(withDefaults);
withDefaults.endObject();
withDefaults.endObject();
assertToXContentEquivalent(BytesReference.bytes(withDefaults), BytesReference.bytes(orig), XContentType.JSON);
XContentBuilder origWithoutDefaults = JsonXContent.contentBuilder().startObject();
createMapperService(indexVersion, fieldMapping(this::minimalMapping)).mappingLookup()
.getMapper("field")
.toXContent(origWithoutDefaults, ToXContent.EMPTY_PARAMS);
origWithoutDefaults.endObject();
XContentBuilder withoutDefaults = JsonXContent.contentBuilder().startObject();
withoutDefaults.startObject("field");
minimalMapping(withoutDefaults);
withoutDefaults.endObject();
withoutDefaults.endObject();
assertToXContentEquivalent(BytesReference.bytes(withoutDefaults), BytesReference.bytes(origWithoutDefaults), XContentType.JSON);
}
public void testMappingWithExplicitIndexOptions() throws Exception {
DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMappingWithExplicitIndexOptions));
assertEquals(Strings.toString(fieldMapping(this::minimalMappingWithExplicitIndexOptions)), mapper.mappingSource().toString());
mapper = createDocumentMapper(fieldMapping(this::mappingWithIndexOptionsPruneFalse));
assertEquals(Strings.toString(fieldMapping(this::mappingWithIndexOptionsPruneFalse)), mapper.mappingSource().toString());
mapper = createDocumentMapper(fieldMapping(this::minimalMappingWithSomeExplicitIndexOptions));
assertEquals(Strings.toString(fieldMapping(this::serializedMappingWithSomeIndexOptions)), mapper.mappingSource().toString());
mapper = createDocumentMapper(fieldMapping(this::mappingWithIndexOptionsOnlyPruneTrue));
assertEquals(Strings.toString(fieldMapping(this::mappingWithIndexOptionsOnlyPruneTrue)), mapper.mappingSource().toString());
}
public void testDotInFieldName() throws Exception {
DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping));
ParsedDocument parsedDocument = mapper.parse(source(b -> b.field("field", Map.of("foo.bar", 10, "foobar", 20))));
@ -306,7 +494,7 @@ public class SparseVectorFieldMapperTests extends MapperTestCase {
return NEW_SPARSE_VECTOR_INDEX_VERSION;
}
public void testSparseVectorUnsupportedIndex() throws Exception {
public void testSparseVectorUnsupportedIndex() {
IndexVersion version = IndexVersionUtils.randomVersionBetween(
random(),
PREVIOUS_SPARSE_VECTOR_INDEX_VERSION,
@ -318,6 +506,393 @@ public class SparseVectorFieldMapperTests extends MapperTestCase {
assertThat(e.getMessage(), containsString(SparseVectorFieldMapper.ERROR_MESSAGE_8X));
}
public void testPruneMustBeBoolean() {
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
b.field("type", "sparse_vector");
b.startObject("index_options");
b.field("prune", "othervalue");
b.endObject();
})));
assertThat(e.getMessage(), containsString("[index_options] failed to parse field [prune]"));
assertThat(e.getCause().getCause(), instanceOf(IllegalArgumentException.class));
assertThat(
e.getCause().getCause().getMessage(),
containsString("Failed to parse value [othervalue] as only [true] or [false] are allowed.")
);
}
public void testPruningConfigurationIsMap() {
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
b.field("type", "sparse_vector");
b.startObject("index_options");
b.field("prune", true);
b.field("pruning_config", "this_is_not_a_map");
b.endObject();
})));
assertThat(e.getMessage(), containsString("[index_options] pruning_config doesn't support values of type:"));
assertThat(e.getCause(), instanceOf(XContentParseException.class));
assertThat(
e.getCause().getMessage(),
containsString("[index_options] pruning_config doesn't support values of type: VALUE_STRING")
);
}
public void testWithIndexOptionsPruningConfigPruneRequired() throws Exception {
Exception eTestPruneIsFalse = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
b.field("type", "sparse_vector");
b.startObject("index_options");
b.field("prune", false);
b.startObject("pruning_config");
b.field("tokens_freq_ratio_threshold", 5.0);
b.field("tokens_weight_threshold", 0.4);
b.endObject();
b.endObject();
})));
assertThat(eTestPruneIsFalse.getMessage(), containsString("[index_options] failed to parse field [pruning_config]"));
assertThat(eTestPruneIsFalse.getCause().getCause().getCause(), instanceOf(IllegalArgumentException.class));
assertThat(
eTestPruneIsFalse.getCause().getCause().getCause().getMessage(),
containsString("[index_options] field [pruning_config] should only be set if [prune] is set to true")
);
Exception eTestPruneIsMissing = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
b.field("type", "sparse_vector");
b.startObject("index_options");
b.startObject("pruning_config");
b.field("tokens_freq_ratio_threshold", 5.0);
b.field("tokens_weight_threshold", 0.4);
b.endObject();
b.endObject();
})));
assertThat(
eTestPruneIsMissing.getMessage(),
containsString("Failed to parse mapping: Failed to build [index_options] after last required field arrived")
);
assertThat(eTestPruneIsMissing.getCause().getCause(), instanceOf(IllegalArgumentException.class));
assertThat(
eTestPruneIsMissing.getCause().getCause().getMessage(),
containsString("[index_options] field [pruning_config] should only be set if [prune] is set to true")
);
}
public void testTokensFreqRatioCorrect() {
Exception eTestInteger = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
b.field("type", "sparse_vector");
b.startObject("index_options");
b.field("prune", true);
b.startObject("pruning_config");
b.field("tokens_freq_ratio_threshold", "notaninteger");
b.endObject();
b.endObject();
})));
assertThat(
eTestInteger.getMessage(),
containsString("Failed to parse mapping: [0:0] [index_options] failed to parse field [pruning_config]")
);
assertThat(eTestInteger.getCause().getCause(), instanceOf(XContentParseException.class));
assertThat(
eTestInteger.getCause().getCause().getMessage(),
containsString("[pruning_config] failed to parse field [tokens_freq_ratio_threshold]")
);
assertThat(eTestInteger.getCause().getCause().getCause(), instanceOf(NumberFormatException.class));
assertThat(eTestInteger.getCause().getCause().getCause().getMessage(), containsString("For input string: \"notaninteger\""));
Exception eTestRangeLower = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
b.field("type", "sparse_vector");
b.startObject("index_options");
b.field("prune", true);
b.startObject("pruning_config");
b.field("tokens_freq_ratio_threshold", -2);
b.endObject();
b.endObject();
})));
assertThat(eTestRangeLower.getMessage(), containsString("[index_options] failed to parse field [pruning_config]"));
assertThat(eTestRangeLower.getCause().getCause().getCause(), instanceOf(IllegalArgumentException.class));
assertThat(
eTestRangeLower.getCause().getCause().getCause().getMessage(),
containsString("[tokens_freq_ratio_threshold] must be between [1] and [100], got -2.0")
);
Exception eTestRangeHigher = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
b.field("type", "sparse_vector");
b.startObject("index_options");
b.field("prune", true);
b.startObject("pruning_config");
b.field("tokens_freq_ratio_threshold", 101);
b.endObject();
b.endObject();
})));
assertThat(eTestRangeHigher.getMessage(), containsString("[index_options] failed to parse field [pruning_config]"));
assertThat(eTestRangeHigher.getCause().getCause().getCause(), instanceOf(IllegalArgumentException.class));
assertThat(
eTestRangeHigher.getCause().getCause().getCause().getMessage(),
containsString("[tokens_freq_ratio_threshold] must be between [1] and [100], got 101.0")
);
}
public void testTokensWeightThresholdCorrect() {
Exception eTestDouble = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
b.field("type", "sparse_vector");
b.startObject("index_options");
b.field("prune", true);
b.startObject("pruning_config");
b.field("tokens_weight_threshold", "notadouble");
b.endObject();
b.endObject();
})));
assertThat(eTestDouble.getMessage(), containsString("[index_options] failed to parse field [pruning_config]"));
assertThat(eTestDouble.getCause().getCause().getCause(), instanceOf(NumberFormatException.class));
assertThat(eTestDouble.getCause().getCause().getCause().getMessage(), containsString("For input string: \"notadouble\""));
Exception eTestRangeLower = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
b.field("type", "sparse_vector");
b.startObject("index_options");
b.field("prune", true);
b.startObject("pruning_config");
b.field("tokens_weight_threshold", -0.1);
b.endObject();
b.endObject();
})));
assertThat(eTestRangeLower.getMessage(), containsString("[index_options] failed to parse field [pruning_config]"));
assertThat(eTestRangeLower.getCause().getCause().getCause(), instanceOf(IllegalArgumentException.class));
assertThat(
eTestRangeLower.getCause().getCause().getCause().getMessage(),
containsString("[tokens_weight_threshold] must be between 0 and 1")
);
Exception eTestRangeHigher = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
b.field("type", "sparse_vector");
b.startObject("index_options");
b.field("prune", true);
b.startObject("pruning_config");
b.field("tokens_weight_threshold", 1.1);
b.endObject();
b.endObject();
})));
assertThat(eTestRangeHigher.getMessage(), containsString("[index_options] failed to parse field [pruning_config]"));
assertThat(eTestRangeHigher.getCause().getCause().getCause(), instanceOf(IllegalArgumentException.class));
assertThat(
eTestRangeHigher.getCause().getCause().getCause().getMessage(),
containsString("[tokens_weight_threshold] must be between 0 and 1")
);
}
private void withSearchExecutionContext(MapperService mapperService, CheckedConsumer<SearchExecutionContext, IOException> consumer)
throws IOException {
var mapper = mapperService.documentMapper();
try (Directory directory = newDirectory()) {
RandomIndexWriter iw = new RandomIndexWriter(random(), directory);
var sourceToParse = source(this::writeField);
ParsedDocument doc1 = mapper.parse(sourceToParse);
iw.addDocument(doc1.rootDoc());
iw.close();
try (DirectoryReader reader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) {
var searchContext = createSearchExecutionContext(mapperService, newSearcher(reader));
consumer.accept(searchContext);
}
}
}
public void testTypeQueryFinalizationWithRandomOptions() throws Exception {
for (int i = 0; i < 20; i++) {
runTestTypeQueryFinalization(
randomBoolean(), // useIndexVersionBeforeIndexOptions
randomBoolean(), // useMapperDefaultIndexOptions
randomBoolean(), // setMapperIndexOptionsPruneToFalse
randomBoolean(), // queryOverridesPruningConfig
randomBoolean() // queryOverridesPruneToBeFalse
);
}
}
public void testTypeQueryFinalizationDefaultsCurrentVersion() throws Exception {
IndexVersion version = IndexVersion.current();
MapperService mapperService = createMapperService(version, fieldMapping(this::minimalMapping));
// query should be pruned by default on newer index versions
performTypeQueryFinalizationTest(mapperService, null, null, true);
}
public void testTypeQueryFinalizationDefaultsPreviousVersion() throws Exception {
IndexVersion version = IndexVersionUtils.randomVersionBetween(
random(),
UPGRADE_TO_LUCENE_10_0_0,
IndexVersionUtils.getPreviousVersion(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT)
);
MapperService mapperService = createMapperService(version, fieldMapping(this::minimalMapping));
// query should _not_ be pruned by default on older index versions
performTypeQueryFinalizationTest(mapperService, null, null, false);
}
public void testTypeQueryFinalizationWithIndexExplicit() throws Exception {
IndexVersion version = IndexVersion.current();
MapperService mapperService = createMapperService(version, fieldMapping(this::minimalMapping));
// query should be pruned via explicit index options
performTypeQueryFinalizationTest(mapperService, null, null, true);
}
public void testTypeQueryFinalizationWithIndexExplicitDoNotPrune() throws Exception {
IndexVersion version = IndexVersion.current();
MapperService mapperService = createMapperService(version, fieldMapping(this::mappingWithIndexOptionsPruneFalse));
// query should be pruned via explicit index options
performTypeQueryFinalizationTest(mapperService, null, null, false);
}
public void testTypeQueryFinalizationQueryOverridesPruning() throws Exception {
IndexVersion version = IndexVersion.current();
MapperService mapperService = createMapperService(version, fieldMapping(this::mappingWithIndexOptionsPruneFalse));
// query should still be pruned due to query builder setting it
performTypeQueryFinalizationTest(mapperService, true, new TokenPruningConfig(), true);
}
public void testTypeQueryFinalizationQueryOverridesPruningOff() throws Exception {
IndexVersion version = IndexVersion.current();
MapperService mapperService = createMapperService(version, fieldMapping(this::mappingWithIndexOptionsPruneFalse));
// query should not pruned due to query builder setting it
performTypeQueryFinalizationTest(mapperService, false, null, false);
}
private void performTypeQueryFinalizationTest(
MapperService mapperService,
@Nullable Boolean queryPrune,
@Nullable TokenPruningConfig queryTokenPruningConfig,
boolean queryShouldBePruned
) throws IOException {
withSearchExecutionContext(mapperService, (context) -> {
SparseVectorFieldMapper.SparseVectorFieldType ft = (SparseVectorFieldMapper.SparseVectorFieldType) mapperService.fieldType(
"field"
);
Query finalizedQuery = ft.finalizeSparseVectorQuery(context, "field", QUERY_VECTORS, queryPrune, queryTokenPruningConfig);
if (queryShouldBePruned) {
assertQueryWasPruned(finalizedQuery);
} else {
assertQueryWasNotPruned(finalizedQuery);
}
});
}
private void assertQueryWasPruned(Query query) {
assertQueryHasClauseCount(query, 0);
}
private void assertQueryWasNotPruned(Query query) {
assertQueryHasClauseCount(query, QUERY_VECTORS.size());
}
private void assertQueryHasClauseCount(Query query, int clauseCount) {
SparseVectorQueryWrapper queryWrapper = (SparseVectorQueryWrapper) query;
var termsQuery = queryWrapper.getTermsQuery();
assertNotNull(termsQuery);
var booleanQuery = (BooleanQuery) termsQuery;
Collection<Query> clauses = booleanQuery.getClauses(BooleanClause.Occur.SHOULD);
assertThat(clauses.size(), equalTo(clauseCount));
}
/**
* Runs a test of the query finalization based on various parameters
* that provides
* @param useIndexVersionBeforeIndexOptions set to true to use a previous index version before mapper index_options
* @param useMapperDefaultIndexOptions set to false to use an explicit, non-default mapper index_options
* @param setMapperIndexOptionsPruneToFalse set to true to use prune:false in the mapper index_options
* @param queryOverridesPruningConfig set to true to designate the query will provide a pruning_config
* @param queryOverridesPruneToBeFalse if true and queryOverridesPruningConfig is true, the query will provide prune:false
* @throws IOException
*/
private void runTestTypeQueryFinalization(
boolean useIndexVersionBeforeIndexOptions,
boolean useMapperDefaultIndexOptions,
boolean setMapperIndexOptionsPruneToFalse,
boolean queryOverridesPruningConfig,
boolean queryOverridesPruneToBeFalse
) throws IOException {
MapperService mapperService = getMapperServiceForTest(
useIndexVersionBeforeIndexOptions,
useMapperDefaultIndexOptions,
setMapperIndexOptionsPruneToFalse
);
// check and see if the query should explicitly override the index_options
Boolean shouldQueryPrune = queryOverridesPruningConfig ? (queryOverridesPruneToBeFalse == false) : null;
// get the pruning configuration for the query if it's overriding
TokenPruningConfig queryPruningConfig = Boolean.TRUE.equals(shouldQueryPrune) ? new TokenPruningConfig() : null;
// our logic if the results should be pruned or not
// we should _not_ prune if any of the following:
// - the query explicitly overrides the options and `prune` is set to false
// - the query does not override the pruning options and:
// - either we are using a previous index version
// - or the index_options explicitly sets `prune` to false
boolean resultShouldNotBePruned = ((queryOverridesPruningConfig && queryOverridesPruneToBeFalse)
|| (queryOverridesPruningConfig == false && (useIndexVersionBeforeIndexOptions || setMapperIndexOptionsPruneToFalse)));
try {
performTypeQueryFinalizationTest(mapperService, shouldQueryPrune, queryPruningConfig, resultShouldNotBePruned == false);
} catch (AssertionError e) {
String message = "performTypeQueryFinalizationTest failed using parameters: "
+ "useIndexVersionBeforeIndexOptions: "
+ useIndexVersionBeforeIndexOptions
+ ", useMapperDefaultIndexOptions: "
+ useMapperDefaultIndexOptions
+ ", setMapperIndexOptionsPruneToFalse: "
+ setMapperIndexOptionsPruneToFalse
+ ", queryOverridesPruningConfig: "
+ queryOverridesPruningConfig
+ ", queryOverridesPruneToBeFalse: "
+ queryOverridesPruneToBeFalse;
throw new AssertionError(message, e);
}
}
private IndexVersion getIndexVersionForTest(boolean usePreviousIndex) {
return usePreviousIndex
? IndexVersionUtils.randomVersionBetween(
random(),
UPGRADE_TO_LUCENE_10_0_0,
IndexVersionUtils.getPreviousVersion(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT)
)
: IndexVersionUtils.randomVersionBetween(random(), SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT, IndexVersion.current());
}
private MapperService getMapperServiceForTest(
boolean usePreviousIndex,
boolean useIndexOptionsDefaults,
boolean explicitIndexOptionsDoNotPrune
) throws IOException {
// get the index version of the test to use
// either a current version that supports index options, or a previous version that does not
IndexVersion indexVersion = getIndexVersionForTest(usePreviousIndex);
// if it's using the old index, we always use the minimal mapping without index_options
if (usePreviousIndex) {
return createMapperService(indexVersion, fieldMapping(this::minimalMapping));
}
// if we set explicitIndexOptionsDoNotPrune, the index_options (if present) will explicitly include "prune: false"
if (explicitIndexOptionsDoNotPrune) {
return createMapperService(indexVersion, fieldMapping(this::mappingWithIndexOptionsPruneFalse));
}
// either return the default (minimal) mapping or one with an explicit pruning_config
return useIndexOptionsDefaults
? createMapperService(indexVersion, fieldMapping(this::minimalMapping))
: createMapperService(indexVersion, fieldMapping(this::minimalMappingWithExplicitIndexOptions));
}
private static List<WeightedToken> QUERY_VECTORS = List.of(
new WeightedToken("pugs", 0.5f),
new WeightedToken("cats", 0.4f),
new WeightedToken("is", 0.1f)
);
/**
* Handles float/double conversion when reading/writing with xcontent by converting all numbers to floats.
*/

View file

@ -9,22 +9,35 @@
package org.elasticsearch.index.mapper.vectors;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.fielddata.FieldDataContext;
import org.elasticsearch.index.mapper.FieldTypeTestCase;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.test.index.IndexVersionUtils;
import java.util.Collections;
public class SparseVectorFieldTypeTests extends FieldTypeTestCase {
public void testDocValuesDisabled() {
MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType("field", false, Collections.emptyMap());
IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween(
random(),
IndexVersions.NEW_SPARSE_VECTOR,
IndexVersion.current()
);
MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType(indexVersion, "field", false, Collections.emptyMap());
assertFalse(fieldType.hasDocValues());
expectThrows(IllegalArgumentException.class, () -> fieldType.fielddataBuilder(FieldDataContext.noRuntimeFields("test")));
}
public void testIsNotAggregatable() {
MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType("field", false, Collections.emptyMap());
IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween(
random(),
IndexVersions.NEW_SPARSE_VECTOR,
IndexVersion.current()
);
MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType(indexVersion, "field", false, Collections.emptyMap());
assertFalse(fieldType.isAggregatable());
}
}