mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-04-19 04:45:07 -04:00
Default new semantic_text fields to use BBQ when models are compatible (#126629)
* Default new semantic_text fields to use BBQ when models are compatible * Update docs/changelog/126629.yaml * Gate default BBQ by IndexVersion * Cleanup from PR feedback * PR feedback * Fix test * Fix test * PR feedback * Update test to test correct options * Hack alert: Fix issue where mapper service was always being created with current index version
This commit is contained in:
parent
0d41e9a2a5
commit
a72883e8e3
7 changed files with 218 additions and 37 deletions
5
docs/changelog/126629.yaml
Normal file
5
docs/changelog/126629.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
pr: 126629
|
||||
summary: Default new `semantic_text` fields to use BBQ when models are compatible
|
||||
area: Relevance
|
||||
type: enhancement
|
||||
issues: []
|
|
@ -160,6 +160,7 @@ public class IndexVersions {
|
|||
public static final IndexVersion SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_SCALED_FLOAT = def(9_020_0_00, Version.LUCENE_10_1_0);
|
||||
public static final IndexVersion USE_LUCENE101_POSTINGS_FORMAT = def(9_021_0_00, Version.LUCENE_10_1_0);
|
||||
public static final IndexVersion UPGRADE_TO_LUCENE_10_2_0 = def(9_022_00_0, Version.LUCENE_10_2_0);
|
||||
public static final IndexVersion SEMANTIC_TEXT_DEFAULTS_TO_BBQ = def(9_023_0_00, Version.LUCENE_10_2_0);
|
||||
/*
|
||||
* STOP! READ THIS FIRST! No, really,
|
||||
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _
|
||||
|
|
|
@ -290,6 +290,11 @@ public class DenseVectorFieldMapper extends FieldMapper {
|
|||
return this;
|
||||
}
|
||||
|
||||
public Builder indexOptions(IndexOptions indexOptions) {
|
||||
this.indexOptions.setValue(indexOptions);
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DenseVectorFieldMapper build(MapperBuilderContext context) {
|
||||
// Validate again here because the dimensions or element type could have been set programmatically,
|
||||
|
@ -1221,7 +1226,7 @@ public class DenseVectorFieldMapper extends FieldMapper {
|
|||
public abstract VectorSimilarityFunction vectorSimilarityFunction(IndexVersion indexVersion, ElementType elementType);
|
||||
}
|
||||
|
||||
abstract static class IndexOptions implements ToXContent {
|
||||
public abstract static class IndexOptions implements ToXContent {
|
||||
final VectorIndexType type;
|
||||
|
||||
IndexOptions(VectorIndexType type) {
|
||||
|
@ -1230,21 +1235,36 @@ public class DenseVectorFieldMapper extends FieldMapper {
|
|||
|
||||
abstract KnnVectorsFormat getVectorsFormat(ElementType elementType);
|
||||
|
||||
final void validateElementType(ElementType elementType) {
|
||||
if (type.supportsElementType(elementType) == false) {
|
||||
public boolean validate(ElementType elementType, int dim, boolean throwOnError) {
|
||||
return validateElementType(elementType, throwOnError) && validateDimension(dim, throwOnError);
|
||||
}
|
||||
|
||||
public boolean validateElementType(ElementType elementType) {
|
||||
return validateElementType(elementType, true);
|
||||
}
|
||||
|
||||
final boolean validateElementType(ElementType elementType, boolean throwOnError) {
|
||||
boolean validElementType = type.supportsElementType(elementType);
|
||||
if (throwOnError && validElementType == false) {
|
||||
throw new IllegalArgumentException(
|
||||
"[element_type] cannot be [" + elementType.toString() + "] when using index type [" + type + "]"
|
||||
);
|
||||
}
|
||||
return validElementType;
|
||||
}
|
||||
|
||||
abstract boolean updatableTo(IndexOptions update);
|
||||
|
||||
public void validateDimension(int dim) {
|
||||
if (type.supportsDimension(dim)) {
|
||||
return;
|
||||
public boolean validateDimension(int dim) {
|
||||
return validateDimension(dim, true);
|
||||
}
|
||||
|
||||
public boolean validateDimension(int dim, boolean throwOnError) {
|
||||
boolean supportsDimension = type.supportsDimension(dim);
|
||||
if (throwOnError && supportsDimension == false) {
|
||||
throw new IllegalArgumentException(type.name + " only supports even dimensions; provided=" + dim);
|
||||
}
|
||||
throw new IllegalArgumentException(type.name + " only supports even dimensions; provided=" + dim);
|
||||
return supportsDimension;
|
||||
}
|
||||
|
||||
abstract boolean doEquals(IndexOptions other);
|
||||
|
@ -1747,12 +1767,12 @@ public class DenseVectorFieldMapper extends FieldMapper {
|
|||
|
||||
}
|
||||
|
||||
static class Int8HnswIndexOptions extends QuantizedIndexOptions {
|
||||
public static class Int8HnswIndexOptions extends QuantizedIndexOptions {
|
||||
private final int m;
|
||||
private final int efConstruction;
|
||||
private final Float confidenceInterval;
|
||||
|
||||
Int8HnswIndexOptions(int m, int efConstruction, Float confidenceInterval, RescoreVector rescoreVector) {
|
||||
public Int8HnswIndexOptions(int m, int efConstruction, Float confidenceInterval, RescoreVector rescoreVector) {
|
||||
super(VectorIndexType.INT8_HNSW, rescoreVector);
|
||||
this.m = m;
|
||||
this.efConstruction = efConstruction;
|
||||
|
@ -1890,11 +1910,11 @@ public class DenseVectorFieldMapper extends FieldMapper {
|
|||
}
|
||||
}
|
||||
|
||||
static class BBQHnswIndexOptions extends QuantizedIndexOptions {
|
||||
public static class BBQHnswIndexOptions extends QuantizedIndexOptions {
|
||||
private final int m;
|
||||
private final int efConstruction;
|
||||
|
||||
BBQHnswIndexOptions(int m, int efConstruction, RescoreVector rescoreVector) {
|
||||
public BBQHnswIndexOptions(int m, int efConstruction, RescoreVector rescoreVector) {
|
||||
super(VectorIndexType.BBQ_HNSW, rescoreVector);
|
||||
this.m = m;
|
||||
this.efConstruction = efConstruction;
|
||||
|
@ -1936,11 +1956,14 @@ public class DenseVectorFieldMapper extends FieldMapper {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void validateDimension(int dim) {
|
||||
if (type.supportsDimension(dim)) {
|
||||
return;
|
||||
public boolean validateDimension(int dim, boolean throwOnError) {
|
||||
boolean supportsDimension = type.supportsDimension(dim);
|
||||
if (throwOnError && supportsDimension == false) {
|
||||
throw new IllegalArgumentException(
|
||||
type.name + " does not support dimensions fewer than " + BBQ_MIN_DIMS + "; provided=" + dim
|
||||
);
|
||||
}
|
||||
throw new IllegalArgumentException(type.name + " does not support dimensions fewer than " + BBQ_MIN_DIMS + "; provided=" + dim);
|
||||
return supportsDimension;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1984,15 +2007,19 @@ public class DenseVectorFieldMapper extends FieldMapper {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void validateDimension(int dim) {
|
||||
if (type.supportsDimension(dim)) {
|
||||
return;
|
||||
public boolean validateDimension(int dim, boolean throwOnError) {
|
||||
boolean supportsDimension = type.supportsDimension(dim);
|
||||
if (throwOnError && supportsDimension == false) {
|
||||
throw new IllegalArgumentException(
|
||||
type.name + " does not support dimensions fewer than " + BBQ_MIN_DIMS + "; provided=" + dim
|
||||
);
|
||||
}
|
||||
throw new IllegalArgumentException(type.name + " does not support dimensions fewer than " + BBQ_MIN_DIMS + "; provided=" + dim);
|
||||
return supportsDimension;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
record RescoreVector(float oversample) implements ToXContentObject {
|
||||
public record RescoreVector(float oversample) implements ToXContentObject {
|
||||
static final String NAME = "rescore_vector";
|
||||
static final String OVERSAMPLE = "oversample";
|
||||
|
||||
|
@ -2311,6 +2338,10 @@ public class DenseVectorFieldMapper extends FieldMapper {
|
|||
ElementType getElementType() {
|
||||
return elementType;
|
||||
}
|
||||
|
||||
public IndexOptions getIndexOptions() {
|
||||
return indexOptions;
|
||||
}
|
||||
}
|
||||
|
||||
private final IndexOptions indexOptions;
|
||||
|
|
|
@ -207,6 +207,13 @@ public abstract class MapperServiceTestCase extends FieldTypeTestCase {
|
|||
return mapperService;
|
||||
}
|
||||
|
||||
protected final MapperService createMapperService(IndexVersion indexVersion, Settings settings, XContentBuilder mappings)
|
||||
throws IOException {
|
||||
MapperService mapperService = createMapperService(indexVersion, settings, () -> true, mappings);
|
||||
merge(mapperService, mappings);
|
||||
return mapperService;
|
||||
}
|
||||
|
||||
protected final MapperService createMapperService(IndexVersion version, XContentBuilder mapping) throws IOException {
|
||||
return createMapperService(version, getIndexSettings(), () -> true, mapping);
|
||||
}
|
||||
|
|
|
@ -9,6 +9,7 @@ package org.elasticsearch.xpack.inference.mapper;
|
|||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
@ -95,6 +96,7 @@ import java.util.function.BiConsumer;
|
|||
import java.util.function.Function;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
import static org.elasticsearch.index.IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ;
|
||||
import static org.elasticsearch.inference.TaskType.SPARSE_EMBEDDING;
|
||||
import static org.elasticsearch.inference.TaskType.TEXT_EMBEDDING;
|
||||
import static org.elasticsearch.search.SearchService.DEFAULT_SIZE;
|
||||
|
@ -133,6 +135,8 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
|
|||
public static final String CONTENT_TYPE = "semantic_text";
|
||||
public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;
|
||||
|
||||
public static final float DEFAULT_RESCORE_OVERSAMPLE = 3.0f;
|
||||
|
||||
public static final TypeParser parser(Supplier<ModelRegistry> modelRegistry) {
|
||||
return new TypeParser(
|
||||
(n, c) -> new Builder(n, c::bitSetProducer, c.getIndexSettings(), modelRegistry.get()),
|
||||
|
@ -1054,12 +1058,30 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
|
|||
denseVectorMapperBuilder.dimensions(modelSettings.dimensions());
|
||||
denseVectorMapperBuilder.elementType(modelSettings.elementType());
|
||||
|
||||
DenseVectorFieldMapper.IndexOptions defaultIndexOptions = null;
|
||||
if (indexVersionCreated.onOrAfter(SEMANTIC_TEXT_DEFAULTS_TO_BBQ)) {
|
||||
defaultIndexOptions = defaultSemanticDenseIndexOptions();
|
||||
}
|
||||
if (defaultIndexOptions != null
|
||||
&& defaultIndexOptions.validate(modelSettings.elementType(), modelSettings.dimensions(), false)) {
|
||||
denseVectorMapperBuilder.indexOptions(defaultIndexOptions);
|
||||
}
|
||||
|
||||
yield denseVectorMapperBuilder;
|
||||
}
|
||||
default -> throw new IllegalArgumentException("Invalid task_type in model_settings [" + modelSettings.taskType().name() + "]");
|
||||
};
|
||||
}
|
||||
|
||||
static DenseVectorFieldMapper.IndexOptions defaultSemanticDenseIndexOptions() {
|
||||
// As embedding models for text perform better with BBQ, we aggressively default semantic_text fields to use optimized index
|
||||
// options outside of dense_vector defaults
|
||||
int m = Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN;
|
||||
int efConstruction = Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
|
||||
DenseVectorFieldMapper.RescoreVector rescoreVector = new DenseVectorFieldMapper.RescoreVector(DEFAULT_RESCORE_OVERSAMPLE);
|
||||
return new DenseVectorFieldMapper.BBQHnswIndexOptions(m, efConstruction, rescoreVector);
|
||||
}
|
||||
|
||||
private static boolean canMergeModelSettings(MinimalServiceSettings previous, MinimalServiceSettings current, Conflicts conflicts) {
|
||||
if (previous != null && current != null && previous.canMergeWith(current)) {
|
||||
return true;
|
||||
|
|
|
@ -37,7 +37,10 @@ public class SemanticInferenceMetadataFieldsMapperTests extends MapperServiceTes
|
|||
assertFalse(InferenceMetadataFieldsMapper.isEnabled(settings));
|
||||
|
||||
settings = Settings.builder()
|
||||
.put(IndexMetadata.SETTING_INDEX_VERSION_CREATED.getKey(), getRandomCompatibleIndexVersion(true))
|
||||
.put(
|
||||
IndexMetadata.SETTING_INDEX_VERSION_CREATED.getKey(),
|
||||
getRandomCompatibleIndexVersion(true, IndexVersionUtils.getPreviousVersion(IndexVersions.INFERENCE_METADATA_FIELDS))
|
||||
)
|
||||
.put(InferenceMetadataFieldsMapper.USE_LEGACY_SEMANTIC_TEXT_FORMAT.getKey(), false)
|
||||
.build();
|
||||
assertFalse(InferenceMetadataFieldsMapper.isEnabled(settings));
|
||||
|
@ -114,18 +117,18 @@ public class SemanticInferenceMetadataFieldsMapperTests extends MapperServiceTes
|
|||
}
|
||||
|
||||
static IndexVersion getRandomCompatibleIndexVersion(boolean useLegacyFormat) {
|
||||
return getRandomCompatibleIndexVersion(useLegacyFormat, IndexVersion.current());
|
||||
}
|
||||
|
||||
static IndexVersion getRandomCompatibleIndexVersion(boolean useLegacyFormat, IndexVersion maxVersion) {
|
||||
if (useLegacyFormat) {
|
||||
if (randomBoolean()) {
|
||||
return IndexVersionUtils.randomVersionBetween(
|
||||
random(),
|
||||
IndexVersions.UPGRADE_TO_LUCENE_10_0_0,
|
||||
IndexVersionUtils.getPreviousVersion(IndexVersions.INFERENCE_METADATA_FIELDS)
|
||||
);
|
||||
return IndexVersionUtils.randomVersionBetween(random(), IndexVersions.UPGRADE_TO_LUCENE_10_0_0, maxVersion);
|
||||
}
|
||||
return IndexVersionUtils.randomPreviousCompatibleVersion(random(), IndexVersions.INFERENCE_METADATA_FIELDS_BACKPORT);
|
||||
} else {
|
||||
if (randomBoolean()) {
|
||||
return IndexVersionUtils.randomVersionBetween(random(), IndexVersions.INFERENCE_METADATA_FIELDS, IndexVersion.current());
|
||||
return IndexVersionUtils.randomVersionBetween(random(), IndexVersions.INFERENCE_METADATA_FIELDS, maxVersion);
|
||||
}
|
||||
return IndexVersionUtils.randomVersionBetween(
|
||||
random(),
|
||||
|
@ -134,4 +137,5 @@ public class SemanticInferenceMetadataFieldsMapperTests extends MapperServiceTes
|
|||
);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -9,6 +9,7 @@ package org.elasticsearch.xpack.inference.mapper;
|
|||
|
||||
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||
|
||||
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
|
@ -35,6 +36,7 @@ import org.elasticsearch.common.lucene.search.Queries;
|
|||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.core.CheckedConsumer;
|
||||
import org.elasticsearch.index.IndexVersion;
|
||||
import org.elasticsearch.index.IndexVersions;
|
||||
import org.elasticsearch.index.mapper.DocumentMapper;
|
||||
import org.elasticsearch.index.mapper.DocumentParsingException;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
|
@ -66,6 +68,7 @@ import org.elasticsearch.search.NestedDocuments;
|
|||
import org.elasticsearch.search.SearchHit;
|
||||
import org.elasticsearch.test.ClusterServiceUtils;
|
||||
import org.elasticsearch.test.client.NoOpClient;
|
||||
import org.elasticsearch.test.index.IndexVersionUtils;
|
||||
import org.elasticsearch.threadpool.TestThreadPool;
|
||||
import org.elasticsearch.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.xcontent.XContentType;
|
||||
|
@ -149,14 +152,44 @@ public class SemanticTextFieldMapperTests extends MapperTestCase {
|
|||
}
|
||||
|
||||
private MapperService createMapperService(XContentBuilder mappings, boolean useLegacyFormat) throws IOException {
|
||||
IndexVersion indexVersion = SemanticInferenceMetadataFieldsMapperTests.getRandomCompatibleIndexVersion(useLegacyFormat);
|
||||
return createMapperService(mappings, useLegacyFormat, indexVersion, indexVersion, false);
|
||||
}
|
||||
|
||||
private MapperService createMapperService(XContentBuilder mappings, boolean useLegacyFormat, IndexVersion minIndexVersion)
|
||||
throws IOException {
|
||||
return createMapperService(mappings, useLegacyFormat, minIndexVersion, IndexVersion.current(), false);
|
||||
}
|
||||
|
||||
private MapperService createMapperService(
|
||||
XContentBuilder mappings,
|
||||
boolean useLegacyFormat,
|
||||
IndexVersion minIndexVersion,
|
||||
IndexVersion maxIndexVersion,
|
||||
boolean propagateIndexVersion
|
||||
) throws IOException {
|
||||
validateIndexVersion(minIndexVersion, useLegacyFormat);
|
||||
IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween(random(), minIndexVersion, maxIndexVersion);
|
||||
var settings = Settings.builder()
|
||||
.put(
|
||||
IndexMetadata.SETTING_INDEX_VERSION_CREATED.getKey(),
|
||||
SemanticInferenceMetadataFieldsMapperTests.getRandomCompatibleIndexVersion(useLegacyFormat)
|
||||
)
|
||||
.put(IndexMetadata.SETTING_INDEX_VERSION_CREATED.getKey(), indexVersion)
|
||||
.put(InferenceMetadataFieldsMapper.USE_LEGACY_SEMANTIC_TEXT_FORMAT.getKey(), useLegacyFormat)
|
||||
.build();
|
||||
return createMapperService(settings, mappings);
|
||||
// TODO - This is added, because we discovered a bug where the index version was not being correctly propagated
|
||||
// in our mappings even though we were specifying the index version in settings. We will fix this in a followup and
|
||||
// remove the boolean flag accordingly.
|
||||
if (propagateIndexVersion) {
|
||||
return createMapperService(indexVersion, settings, mappings);
|
||||
} else {
|
||||
return createMapperService(settings, mappings);
|
||||
}
|
||||
}
|
||||
|
||||
private static void validateIndexVersion(IndexVersion indexVersion, boolean useLegacyFormat) {
|
||||
if (useLegacyFormat == false
|
||||
&& indexVersion.before(IndexVersions.INFERENCE_METADATA_FIELDS)
|
||||
&& indexVersion.between(IndexVersions.INFERENCE_METADATA_FIELDS_BACKPORT, IndexVersions.UPGRADE_TO_LUCENE_10_0_0) == false) {
|
||||
throw new IllegalArgumentException("Index version " + indexVersion + " does not support new semantic text format");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -602,14 +635,15 @@ public class SemanticTextFieldMapperTests extends MapperTestCase {
|
|||
}
|
||||
|
||||
private static void assertSemanticTextField(MapperService mapperService, String fieldName, boolean expectedModelSettings) {
|
||||
assertSemanticTextField(mapperService, fieldName, expectedModelSettings, null);
|
||||
assertSemanticTextField(mapperService, fieldName, expectedModelSettings, null, null);
|
||||
}
|
||||
|
||||
private static void assertSemanticTextField(
|
||||
MapperService mapperService,
|
||||
String fieldName,
|
||||
boolean expectedModelSettings,
|
||||
ChunkingSettings expectedChunkingSettings
|
||||
ChunkingSettings expectedChunkingSettings,
|
||||
DenseVectorFieldMapper.IndexOptions expectedIndexOptions
|
||||
) {
|
||||
Mapper mapper = mapperService.mappingLookup().getMapper(fieldName);
|
||||
assertNotNull(mapper);
|
||||
|
@ -655,8 +689,17 @@ public class SemanticTextFieldMapperTests extends MapperTestCase {
|
|||
assertThat(embeddingsMapper, instanceOf(SparseVectorFieldMapper.class));
|
||||
SparseVectorFieldMapper sparseMapper = (SparseVectorFieldMapper) embeddingsMapper;
|
||||
assertEquals(sparseMapper.fieldType().isStored(), semanticTextFieldType.useLegacyFormat() == false);
|
||||
assertNull(expectedIndexOptions);
|
||||
}
|
||||
case TEXT_EMBEDDING -> {
|
||||
assertThat(embeddingsMapper, instanceOf(DenseVectorFieldMapper.class));
|
||||
DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) embeddingsMapper;
|
||||
if (expectedIndexOptions != null) {
|
||||
assertEquals(expectedIndexOptions, denseVectorFieldMapper.fieldType().getIndexOptions());
|
||||
} else {
|
||||
assertNull(denseVectorFieldMapper.fieldType().getIndexOptions());
|
||||
}
|
||||
}
|
||||
case TEXT_EMBEDDING -> assertThat(embeddingsMapper, instanceOf(DenseVectorFieldMapper.class));
|
||||
default -> throw new AssertionError("Invalid task type");
|
||||
}
|
||||
} else {
|
||||
|
@ -951,11 +994,11 @@ public class SemanticTextFieldMapperTests extends MapperTestCase {
|
|||
mapping(b -> addSemanticTextMapping(b, fieldName, model.getInferenceEntityId(), null, chunkingSettings)),
|
||||
useLegacyFormat
|
||||
);
|
||||
assertSemanticTextField(mapperService, fieldName, false, chunkingSettings);
|
||||
assertSemanticTextField(mapperService, fieldName, false, chunkingSettings, null);
|
||||
|
||||
ChunkingSettings newChunkingSettings = generateRandomChunkingSettingsOtherThan(chunkingSettings);
|
||||
merge(mapperService, mapping(b -> addSemanticTextMapping(b, fieldName, model.getInferenceEntityId(), null, newChunkingSettings)));
|
||||
assertSemanticTextField(mapperService, fieldName, false, newChunkingSettings);
|
||||
assertSemanticTextField(mapperService, fieldName, false, newChunkingSettings, null);
|
||||
}
|
||||
|
||||
public void testModelSettingsRequiredWithChunks() throws IOException {
|
||||
|
@ -1085,6 +1128,74 @@ public class SemanticTextFieldMapperTests extends MapperTestCase {
|
|||
assertThat(existsQuery, instanceOf(ESToParentBlockJoinQuery.class));
|
||||
}
|
||||
|
||||
private static DenseVectorFieldMapper.IndexOptions defaultDenseVectorIndexOptions() {
|
||||
// These are the default index options for dense_vector fields, and used for semantic_text fields incompatible with BBQ.
|
||||
int m = Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN;
|
||||
int efConstruction = Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
|
||||
return new DenseVectorFieldMapper.Int8HnswIndexOptions(m, efConstruction, null, null);
|
||||
}
|
||||
|
||||
public void testDefaultIndexOptions() throws IOException {
|
||||
|
||||
// We default to BBQ for eligible dense vectors
|
||||
var mapperService = createMapperService(fieldMapping(b -> {
|
||||
b.field("type", "semantic_text");
|
||||
b.field("inference_id", "another_inference_id");
|
||||
b.startObject("model_settings");
|
||||
b.field("task_type", "text_embedding");
|
||||
b.field("dimensions", 100);
|
||||
b.field("similarity", "cosine");
|
||||
b.field("element_type", "float");
|
||||
b.endObject();
|
||||
}), useLegacyFormat, IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ);
|
||||
assertSemanticTextField(mapperService, "field", true, null, SemanticTextFieldMapper.defaultSemanticDenseIndexOptions());
|
||||
|
||||
// Element types that are incompatible with BBQ will continue to use dense_vector defaults
|
||||
mapperService = createMapperService(fieldMapping(b -> {
|
||||
b.field("type", "semantic_text");
|
||||
b.field("inference_id", "another_inference_id");
|
||||
b.startObject("model_settings");
|
||||
b.field("task_type", "text_embedding");
|
||||
b.field("dimensions", 100);
|
||||
b.field("similarity", "cosine");
|
||||
b.field("element_type", "byte");
|
||||
b.endObject();
|
||||
}), useLegacyFormat, IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ);
|
||||
assertSemanticTextField(mapperService, "field", true, null, null);
|
||||
|
||||
// A dim count of 10 is too small to support BBQ, so we continue to use dense_vector defaults
|
||||
mapperService = createMapperService(fieldMapping(b -> {
|
||||
b.field("type", "semantic_text");
|
||||
b.field("inference_id", "another_inference_id");
|
||||
b.startObject("model_settings");
|
||||
b.field("task_type", "text_embedding");
|
||||
b.field("dimensions", 10);
|
||||
b.field("similarity", "cosine");
|
||||
b.field("element_type", "float");
|
||||
b.endObject();
|
||||
}), useLegacyFormat, IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ);
|
||||
assertSemanticTextField(mapperService, "field", true, null, defaultDenseVectorIndexOptions());
|
||||
|
||||
// Previous index versions do not set BBQ index options
|
||||
mapperService = createMapperService(fieldMapping(b -> {
|
||||
b.field("type", "semantic_text");
|
||||
b.field("inference_id", "another_inference_id");
|
||||
b.startObject("model_settings");
|
||||
b.field("task_type", "text_embedding");
|
||||
b.field("dimensions", 100);
|
||||
b.field("similarity", "cosine");
|
||||
b.field("element_type", "float");
|
||||
b.endObject();
|
||||
}),
|
||||
useLegacyFormat,
|
||||
IndexVersions.INFERENCE_METADATA_FIELDS,
|
||||
IndexVersionUtils.getPreviousVersion(IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ),
|
||||
true
|
||||
);
|
||||
assertSemanticTextField(mapperService, "field", true, null, defaultDenseVectorIndexOptions());
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) {
|
||||
// Until a doc is indexed, the query is rewritten as match no docs
|
||||
|
|
Loading…
Add table
Reference in a new issue