mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-28 09:28:55 -04:00
Extensible Completion Postings Formats (#111494)
Allows the Completion Postings Format to be extensible by providing an implementation of the CompletionsPostingsFormatExtension SPIs.
This commit is contained in:
parent
f096c317c0
commit
8350ff29ba
6 changed files with 59 additions and 26 deletions
5
docs/changelog/111494.yaml
Normal file
5
docs/changelog/111494.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
pr: 111494
|
||||
summary: Extensible Completion Postings Formats
|
||||
area: "Suggesters"
|
||||
type: enhancement
|
||||
issues: []
|
|
@ -7,6 +7,7 @@
|
|||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
import org.elasticsearch.internal.CompletionsPostingsFormatExtension;
|
||||
import org.elasticsearch.plugins.internal.RestExtension;
|
||||
|
||||
/** The Elasticsearch Server Module. */
|
||||
|
@ -288,7 +289,8 @@ module org.elasticsearch.server {
|
|||
to
|
||||
org.elasticsearch.serverless.version,
|
||||
org.elasticsearch.serverless.buildinfo,
|
||||
org.elasticsearch.serverless.constants;
|
||||
org.elasticsearch.serverless.constants,
|
||||
org.elasticsearch.serverless.codec;
|
||||
exports org.elasticsearch.lucene.analysis.miscellaneous;
|
||||
exports org.elasticsearch.lucene.grouping;
|
||||
exports org.elasticsearch.lucene.queries;
|
||||
|
@ -395,6 +397,7 @@ module org.elasticsearch.server {
|
|||
org.elasticsearch.stateless,
|
||||
org.elasticsearch.settings.secure,
|
||||
org.elasticsearch.serverless.constants,
|
||||
org.elasticsearch.serverless.codec,
|
||||
org.elasticsearch.serverless.apifiltering,
|
||||
org.elasticsearch.internal.security;
|
||||
|
||||
|
@ -414,6 +417,7 @@ module org.elasticsearch.server {
|
|||
uses org.elasticsearch.node.internal.TerminationHandlerProvider;
|
||||
uses org.elasticsearch.internal.VersionExtension;
|
||||
uses org.elasticsearch.internal.BuildExtension;
|
||||
uses CompletionsPostingsFormatExtension;
|
||||
uses org.elasticsearch.features.FeatureSpecification;
|
||||
uses org.elasticsearch.plugins.internal.LoggingDataProvider;
|
||||
|
||||
|
|
|
@ -20,10 +20,15 @@ import org.elasticsearch.index.IndexSettings;
|
|||
import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat;
|
||||
import org.elasticsearch.index.codec.postings.ES812PostingsFormat;
|
||||
import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
|
||||
import org.elasticsearch.index.mapper.CompletionFieldMapper;
|
||||
import org.elasticsearch.index.mapper.IdFieldMapper;
|
||||
import org.elasticsearch.index.mapper.Mapper;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
|
||||
import org.elasticsearch.internal.CompletionsPostingsFormatExtension;
|
||||
import org.elasticsearch.plugins.ExtensionLoader;
|
||||
|
||||
import java.util.ServiceLoader;
|
||||
|
||||
/**
|
||||
* Class that encapsulates the logic of figuring out the most appropriate file format for a given field, across postings, doc values and
|
||||
|
@ -53,15 +58,28 @@ public class PerFieldFormatSupplier {
|
|||
|
||||
private PostingsFormat internalGetPostingsFormatForField(String field) {
|
||||
if (mapperService != null) {
|
||||
final PostingsFormat format = mapperService.mappingLookup().getPostingsFormat(field);
|
||||
if (format != null) {
|
||||
return format;
|
||||
Mapper mapper = mapperService.mappingLookup().getMapper(field);
|
||||
if (mapper instanceof CompletionFieldMapper) {
|
||||
return PostingsFormatHolder.POSTINGS_FORMAT;
|
||||
}
|
||||
}
|
||||
// return our own posting format using PFOR
|
||||
return es812PostingsFormat;
|
||||
}
|
||||
|
||||
private static class PostingsFormatHolder {
|
||||
private static final PostingsFormat POSTINGS_FORMAT = getPostingsFormat();
|
||||
|
||||
private static PostingsFormat getPostingsFormat() {
|
||||
String defaultName = "Completion912"; // Caution: changing this name will result in exceptions if a field is created during a
|
||||
// rolling upgrade and the new codec (specified by the name) is not available on all nodes in the cluster.
|
||||
String codecName = ExtensionLoader.loadSingleton(ServiceLoader.load(CompletionsPostingsFormatExtension.class))
|
||||
.map(CompletionsPostingsFormatExtension::getFormatName)
|
||||
.orElse(defaultName);
|
||||
return PostingsFormat.forName(codecName);
|
||||
}
|
||||
}
|
||||
|
||||
boolean useBloomFilter(String field) {
|
||||
if (mapperService == null) {
|
||||
return false;
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
*/
|
||||
package org.elasticsearch.index.mapper;
|
||||
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -344,10 +343,6 @@ public class CompletionFieldMapper extends FieldMapper {
|
|||
return (CompletionFieldType) super.fieldType();
|
||||
}
|
||||
|
||||
static PostingsFormat postingsFormat() {
|
||||
return PostingsFormat.forName("Completion912");
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean parsesArrayValue() {
|
||||
return true;
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
|
||||
package org.elasticsearch.index.mapper;
|
||||
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.elasticsearch.cluster.metadata.DataStream;
|
||||
import org.elasticsearch.cluster.metadata.InferenceFieldMetadata;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
|
@ -21,7 +20,6 @@ import java.util.ArrayList;
|
|||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
@ -58,7 +56,6 @@ public final class MappingLookup {
|
|||
private final Map<String, NamedAnalyzer> indexAnalyzersMap;
|
||||
private final List<FieldMapper> indexTimeScriptMappers;
|
||||
private final Mapping mapping;
|
||||
private final Set<String> completionFields;
|
||||
private final int totalFieldsCount;
|
||||
|
||||
/**
|
||||
|
@ -161,7 +158,6 @@ public final class MappingLookup {
|
|||
this.nestedLookup = NestedLookup.build(nestedMappers);
|
||||
|
||||
final Map<String, NamedAnalyzer> indexAnalyzersMap = new HashMap<>();
|
||||
final Set<String> completionFields = new HashSet<>();
|
||||
final List<FieldMapper> indexTimeScriptMappers = new ArrayList<>();
|
||||
for (FieldMapper mapper : mappers) {
|
||||
if (objects.containsKey(mapper.fullPath())) {
|
||||
|
@ -174,9 +170,6 @@ public final class MappingLookup {
|
|||
if (mapper.hasScript()) {
|
||||
indexTimeScriptMappers.add(mapper);
|
||||
}
|
||||
if (mapper instanceof CompletionFieldMapper) {
|
||||
completionFields.add(mapper.fullPath());
|
||||
}
|
||||
}
|
||||
|
||||
for (FieldAliasMapper aliasMapper : aliasMappers) {
|
||||
|
@ -211,7 +204,6 @@ public final class MappingLookup {
|
|||
this.objectMappers = Map.copyOf(objects);
|
||||
this.runtimeFieldMappersCount = runtimeFields.size();
|
||||
this.indexAnalyzersMap = Map.copyOf(indexAnalyzersMap);
|
||||
this.completionFields = Set.copyOf(completionFields);
|
||||
this.indexTimeScriptMappers = List.copyOf(indexTimeScriptMappers);
|
||||
|
||||
runtimeFields.stream().flatMap(RuntimeField::asMappedFieldTypes).map(MappedFieldType::name).forEach(this::validateDoesNotShadow);
|
||||
|
@ -285,15 +277,6 @@ public final class MappingLookup {
|
|||
return fieldMappers.values();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the postings format for a particular field
|
||||
* @param field the field to retrieve a postings format for
|
||||
* @return the postings format for the field, or {@code null} if the default format should be used
|
||||
*/
|
||||
public PostingsFormat getPostingsFormat(String field) {
|
||||
return completionFields.contains(field) ? CompletionFieldMapper.postingsFormat() : null;
|
||||
}
|
||||
|
||||
void checkLimits(IndexSettings settings) {
|
||||
checkFieldLimit(settings.getMappingTotalFieldsLimit());
|
||||
checkObjectDepthLimit(settings.getMappingDepthLimit());
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.internal;
|
||||
|
||||
import org.apache.lucene.search.suggest.document.CompletionPostingsFormat;
|
||||
|
||||
/**
|
||||
* Allows plugging-in the Completions Postings Format.
|
||||
*/
|
||||
public interface CompletionsPostingsFormatExtension {
|
||||
|
||||
/**
|
||||
* Returns the name of the {@link CompletionPostingsFormat} that Elasticsearch should use. Should return null if the extension
|
||||
* is not enabled.
|
||||
* <p>
|
||||
* Note that the name must match a codec that is available on all nodes in the cluster, otherwise IndexCorruptionExceptions will occur.
|
||||
* A feature can be used to protect against this scenario, or alternatively, the codec code can be rolled out prior to its usage by this
|
||||
* extension.
|
||||
*/
|
||||
String getFormatName();
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue