Extensible Completion Postings Formats (#111494)

Allows the Completion Postings Format to be extensible by providing an implementation of the CompletionsPostingsFormatExtension SPIs.
This commit is contained in:
John Verwolf 2024-11-28 13:25:02 -08:00 committed by GitHub
parent f096c317c0
commit 8350ff29ba
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 59 additions and 26 deletions

View file

@ -0,0 +1,5 @@
pr: 111494
summary: Extensible Completion Postings Formats
area: "Suggesters"
type: enhancement
issues: []

View file

@ -7,6 +7,7 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/
import org.elasticsearch.internal.CompletionsPostingsFormatExtension;
import org.elasticsearch.plugins.internal.RestExtension;
/** The Elasticsearch Server Module. */
@ -288,7 +289,8 @@ module org.elasticsearch.server {
to
org.elasticsearch.serverless.version,
org.elasticsearch.serverless.buildinfo,
org.elasticsearch.serverless.constants;
org.elasticsearch.serverless.constants,
org.elasticsearch.serverless.codec;
exports org.elasticsearch.lucene.analysis.miscellaneous;
exports org.elasticsearch.lucene.grouping;
exports org.elasticsearch.lucene.queries;
@ -395,6 +397,7 @@ module org.elasticsearch.server {
org.elasticsearch.stateless,
org.elasticsearch.settings.secure,
org.elasticsearch.serverless.constants,
org.elasticsearch.serverless.codec,
org.elasticsearch.serverless.apifiltering,
org.elasticsearch.internal.security;
@ -414,6 +417,7 @@ module org.elasticsearch.server {
uses org.elasticsearch.node.internal.TerminationHandlerProvider;
uses org.elasticsearch.internal.VersionExtension;
uses org.elasticsearch.internal.BuildExtension;
uses CompletionsPostingsFormatExtension;
uses org.elasticsearch.features.FeatureSpecification;
uses org.elasticsearch.plugins.internal.LoggingDataProvider;

View file

@ -20,10 +20,15 @@ import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat;
import org.elasticsearch.index.codec.postings.ES812PostingsFormat;
import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
import org.elasticsearch.index.mapper.CompletionFieldMapper;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
import org.elasticsearch.internal.CompletionsPostingsFormatExtension;
import org.elasticsearch.plugins.ExtensionLoader;
import java.util.ServiceLoader;
/**
* Class that encapsulates the logic of figuring out the most appropriate file format for a given field, across postings, doc values and
@ -53,15 +58,28 @@ public class PerFieldFormatSupplier {
private PostingsFormat internalGetPostingsFormatForField(String field) {
if (mapperService != null) {
final PostingsFormat format = mapperService.mappingLookup().getPostingsFormat(field);
if (format != null) {
return format;
Mapper mapper = mapperService.mappingLookup().getMapper(field);
if (mapper instanceof CompletionFieldMapper) {
return PostingsFormatHolder.POSTINGS_FORMAT;
}
}
// return our own posting format using PFOR
return es812PostingsFormat;
}
private static class PostingsFormatHolder {
private static final PostingsFormat POSTINGS_FORMAT = getPostingsFormat();
private static PostingsFormat getPostingsFormat() {
String defaultName = "Completion912"; // Caution: changing this name will result in exceptions if a field is created during a
// rolling upgrade and the new codec (specified by the name) is not available on all nodes in the cluster.
String codecName = ExtensionLoader.loadSingleton(ServiceLoader.load(CompletionsPostingsFormatExtension.class))
.map(CompletionsPostingsFormatExtension::getFormatName)
.orElse(defaultName);
return PostingsFormat.forName(codecName);
}
}
boolean useBloomFilter(String field) {
if (mapperService == null) {
return false;

View file

@ -8,7 +8,6 @@
*/
package org.elasticsearch.index.mapper;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
@ -344,10 +343,6 @@ public class CompletionFieldMapper extends FieldMapper {
return (CompletionFieldType) super.fieldType();
}
static PostingsFormat postingsFormat() {
return PostingsFormat.forName("Completion912");
}
@Override
public boolean parsesArrayValue() {
return true;

View file

@ -9,7 +9,6 @@
package org.elasticsearch.index.mapper;
import org.apache.lucene.codecs.PostingsFormat;
import org.elasticsearch.cluster.metadata.DataStream;
import org.elasticsearch.cluster.metadata.InferenceFieldMetadata;
import org.elasticsearch.index.IndexSettings;
@ -21,7 +20,6 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@ -58,7 +56,6 @@ public final class MappingLookup {
private final Map<String, NamedAnalyzer> indexAnalyzersMap;
private final List<FieldMapper> indexTimeScriptMappers;
private final Mapping mapping;
private final Set<String> completionFields;
private final int totalFieldsCount;
/**
@ -161,7 +158,6 @@ public final class MappingLookup {
this.nestedLookup = NestedLookup.build(nestedMappers);
final Map<String, NamedAnalyzer> indexAnalyzersMap = new HashMap<>();
final Set<String> completionFields = new HashSet<>();
final List<FieldMapper> indexTimeScriptMappers = new ArrayList<>();
for (FieldMapper mapper : mappers) {
if (objects.containsKey(mapper.fullPath())) {
@ -174,9 +170,6 @@ public final class MappingLookup {
if (mapper.hasScript()) {
indexTimeScriptMappers.add(mapper);
}
if (mapper instanceof CompletionFieldMapper) {
completionFields.add(mapper.fullPath());
}
}
for (FieldAliasMapper aliasMapper : aliasMappers) {
@ -211,7 +204,6 @@ public final class MappingLookup {
this.objectMappers = Map.copyOf(objects);
this.runtimeFieldMappersCount = runtimeFields.size();
this.indexAnalyzersMap = Map.copyOf(indexAnalyzersMap);
this.completionFields = Set.copyOf(completionFields);
this.indexTimeScriptMappers = List.copyOf(indexTimeScriptMappers);
runtimeFields.stream().flatMap(RuntimeField::asMappedFieldTypes).map(MappedFieldType::name).forEach(this::validateDoesNotShadow);
@ -285,15 +277,6 @@ public final class MappingLookup {
return fieldMappers.values();
}
/**
* Gets the postings format for a particular field
* @param field the field to retrieve a postings format for
* @return the postings format for the field, or {@code null} if the default format should be used
*/
public PostingsFormat getPostingsFormat(String field) {
return completionFields.contains(field) ? CompletionFieldMapper.postingsFormat() : null;
}
void checkLimits(IndexSettings settings) {
checkFieldLimit(settings.getMappingTotalFieldsLimit());
checkObjectDepthLimit(settings.getMappingDepthLimit());

View file

@ -0,0 +1,28 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.internal;
import org.apache.lucene.search.suggest.document.CompletionPostingsFormat;
/**
* Allows plugging-in the Completions Postings Format.
*/
public interface CompletionsPostingsFormatExtension {
/**
* Returns the name of the {@link CompletionPostingsFormat} that Elasticsearch should use. Should return null if the extension
* is not enabled.
* <p>
* Note that the name must match a codec that is available on all nodes in the cluster, otherwise IndexCorruptionExceptions will occur.
* A feature can be used to protect against this scenario, or alternatively, the codec code can be rolled out prior to its usage by this
* extension.
*/
String getFormatName();
}