mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-28 01:22:26 -04:00
Use optimized text in match_only_text fields (#129371)
Follow-up to #126492 to use the json parsing optimizations for match_only_text fields. Relates to #129072.
This commit is contained in:
parent
9f78d11639
commit
5d1999781a
7 changed files with 90 additions and 13 deletions
|
@ -41,6 +41,7 @@ dependencies {
|
||||||
}
|
}
|
||||||
api(project(':libs:h3'))
|
api(project(':libs:h3'))
|
||||||
api(project(':modules:aggregations'))
|
api(project(':modules:aggregations'))
|
||||||
|
implementation project(':modules:mapper-extras');
|
||||||
api(project(':x-pack:plugin:esql-core'))
|
api(project(':x-pack:plugin:esql-core'))
|
||||||
api(project(':x-pack:plugin:core'))
|
api(project(':x-pack:plugin:core'))
|
||||||
api(project(':x-pack:plugin:esql'))
|
api(project(':x-pack:plugin:esql'))
|
||||||
|
|
|
@ -29,6 +29,7 @@ import org.elasticsearch.index.mapper.MapperService;
|
||||||
import org.elasticsearch.index.mapper.ProvidedIdFieldMapper;
|
import org.elasticsearch.index.mapper.ProvidedIdFieldMapper;
|
||||||
import org.elasticsearch.index.similarity.SimilarityService;
|
import org.elasticsearch.index.similarity.SimilarityService;
|
||||||
import org.elasticsearch.indices.IndicesModule;
|
import org.elasticsearch.indices.IndicesModule;
|
||||||
|
import org.elasticsearch.plugins.MapperPlugin;
|
||||||
import org.elasticsearch.script.Script;
|
import org.elasticsearch.script.Script;
|
||||||
import org.elasticsearch.script.ScriptCompiler;
|
import org.elasticsearch.script.ScriptCompiler;
|
||||||
import org.elasticsearch.script.ScriptContext;
|
import org.elasticsearch.script.ScriptContext;
|
||||||
|
@ -38,11 +39,16 @@ import org.elasticsearch.xcontent.XContentParserConfiguration;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.UncheckedIOException;
|
import java.io.UncheckedIOException;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
public class MapperServiceFactory {
|
public class MapperServiceFactory {
|
||||||
|
|
||||||
public static MapperService create(String mappings) {
|
public static MapperService create(String mappings) {
|
||||||
|
return create(mappings, Collections.emptyList());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static MapperService create(String mappings, List<MapperPlugin> mapperPlugins) {
|
||||||
Settings settings = Settings.builder()
|
Settings settings = Settings.builder()
|
||||||
.put("index.number_of_replicas", 0)
|
.put("index.number_of_replicas", 0)
|
||||||
.put("index.number_of_shards", 1)
|
.put("index.number_of_shards", 1)
|
||||||
|
@ -51,7 +57,7 @@ public class MapperServiceFactory {
|
||||||
.build();
|
.build();
|
||||||
IndexMetadata meta = IndexMetadata.builder("index").settings(settings).build();
|
IndexMetadata meta = IndexMetadata.builder("index").settings(settings).build();
|
||||||
IndexSettings indexSettings = new IndexSettings(meta, settings);
|
IndexSettings indexSettings = new IndexSettings(meta, settings);
|
||||||
MapperRegistry mapperRegistry = new IndicesModule(Collections.emptyList()).getMapperRegistry();
|
MapperRegistry mapperRegistry = new IndicesModule(mapperPlugins).getMapperRegistry();
|
||||||
|
|
||||||
SimilarityService similarityService = new SimilarityService(indexSettings, null, Map.of());
|
SimilarityService similarityService = new SimilarityService(indexSettings, null, Map.of());
|
||||||
BitsetFilterCache bitsetFilterCache = new BitsetFilterCache(indexSettings, BitsetFilterCache.Listener.NOOP);
|
BitsetFilterCache bitsetFilterCache = new BitsetFilterCache(indexSettings, BitsetFilterCache.Listener.NOOP);
|
||||||
|
|
|
@ -15,6 +15,7 @@ import org.elasticsearch.common.bytes.BytesReference;
|
||||||
import org.elasticsearch.common.logging.LogConfigurator;
|
import org.elasticsearch.common.logging.LogConfigurator;
|
||||||
import org.elasticsearch.index.mapper.MapperService;
|
import org.elasticsearch.index.mapper.MapperService;
|
||||||
import org.elasticsearch.index.mapper.SourceToParse;
|
import org.elasticsearch.index.mapper.SourceToParse;
|
||||||
|
import org.elasticsearch.index.mapper.extras.MapperExtrasPlugin;
|
||||||
import org.elasticsearch.xcontent.XContentBuilder;
|
import org.elasticsearch.xcontent.XContentBuilder;
|
||||||
import org.elasticsearch.xcontent.XContentFactory;
|
import org.elasticsearch.xcontent.XContentFactory;
|
||||||
import org.elasticsearch.xcontent.XContentType;
|
import org.elasticsearch.xcontent.XContentType;
|
||||||
|
@ -34,6 +35,7 @@ import org.openjdk.jmh.annotations.Warmup;
|
||||||
import org.openjdk.jmh.infra.Blackhole;
|
import org.openjdk.jmh.infra.Blackhole;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
@ -66,7 +68,7 @@ public class OptimizedTextBenchmark {
|
||||||
private SourceToParse[] sources;
|
private SourceToParse[] sources;
|
||||||
|
|
||||||
private String randomValue(int length) {
|
private String randomValue(int length) {
|
||||||
final String CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
|
final String CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ";
|
||||||
Random random = new Random();
|
Random random = new Random();
|
||||||
StringBuilder builder = new StringBuilder(length);
|
StringBuilder builder = new StringBuilder(length);
|
||||||
for (int i = 0; i < length; i++) {
|
for (int i = 0; i < length; i++) {
|
||||||
|
@ -83,17 +85,17 @@ public class OptimizedTextBenchmark {
|
||||||
"dynamic": false,
|
"dynamic": false,
|
||||||
"properties": {
|
"properties": {
|
||||||
"field": {
|
"field": {
|
||||||
"type": "keyword"
|
"type": "match_only_text"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
""");
|
""", List.of(new MapperExtrasPlugin()));
|
||||||
|
|
||||||
sources = new SourceToParse[nDocs];
|
sources = new SourceToParse[nDocs];
|
||||||
for (int i = 0; i < nDocs; i++) {
|
for (int i = 0; i < nDocs; i++) {
|
||||||
XContentBuilder b = XContentFactory.jsonBuilder();
|
XContentBuilder b = XContentFactory.jsonBuilder();
|
||||||
b.startObject().field("field", randomValue(8)).endObject();
|
b.startObject().field("field", randomValue(512)).endObject();
|
||||||
sources[i] = new SourceToParse(UUIDs.randomBase64UUID(), BytesReference.bytes(b), XContentType.JSON);
|
sources[i] = new SourceToParse(UUIDs.randomBase64UUID(), BytesReference.bytes(b), XContentType.JSON);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IOFunction;
|
import org.apache.lucene.util.IOFunction;
|
||||||
import org.elasticsearch.common.CheckedIntFunction;
|
import org.elasticsearch.common.CheckedIntFunction;
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
|
import org.elasticsearch.common.text.UTF8DecodingReader;
|
||||||
import org.elasticsearch.common.unit.Fuzziness;
|
import org.elasticsearch.common.unit.Fuzziness;
|
||||||
import org.elasticsearch.index.IndexVersion;
|
import org.elasticsearch.index.IndexVersion;
|
||||||
import org.elasticsearch.index.analysis.IndexAnalyzers;
|
import org.elasticsearch.index.analysis.IndexAnalyzers;
|
||||||
|
@ -364,7 +365,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
||||||
@Override
|
@Override
|
||||||
public BlockLoader blockLoader(BlockLoaderContext blContext) {
|
public BlockLoader blockLoader(BlockLoaderContext blContext) {
|
||||||
if (textFieldType.isSyntheticSource()) {
|
if (textFieldType.isSyntheticSource()) {
|
||||||
return new BlockStoredFieldsReader.BytesFromStringsBlockLoader(storedFieldNameForSyntheticSource());
|
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource());
|
||||||
}
|
}
|
||||||
SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name()));
|
SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name()));
|
||||||
// MatchOnlyText never has norms, so we have to use the field names field
|
// MatchOnlyText never has norms, so we have to use the field names field
|
||||||
|
@ -385,7 +386,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
||||||
) {
|
) {
|
||||||
@Override
|
@Override
|
||||||
protected BytesRef storedToBytesRef(Object stored) {
|
protected BytesRef storedToBytesRef(Object stored) {
|
||||||
return new BytesRef((String) stored);
|
return (BytesRef) stored;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -443,18 +444,20 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void parseCreateField(DocumentParserContext context) throws IOException {
|
protected void parseCreateField(DocumentParserContext context) throws IOException {
|
||||||
final String value = context.parser().textOrNull();
|
final var value = context.parser().optimizedTextOrNull();
|
||||||
|
|
||||||
if (value == null) {
|
if (value == null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Field field = new Field(fieldType().name(), value, fieldType);
|
final var utfBytes = value.bytes();
|
||||||
|
Field field = new Field(fieldType().name(), new UTF8DecodingReader(utfBytes), fieldType);
|
||||||
context.doc().add(field);
|
context.doc().add(field);
|
||||||
context.addToFieldNames(fieldType().name());
|
context.addToFieldNames(fieldType().name());
|
||||||
|
|
||||||
if (storeSource) {
|
if (storeSource) {
|
||||||
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), value));
|
final var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length());
|
||||||
|
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), bytesRef));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -474,7 +477,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
||||||
() -> new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), fieldType().name(), leafName()) {
|
() -> new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), fieldType().name(), leafName()) {
|
||||||
@Override
|
@Override
|
||||||
protected void write(XContentBuilder b, Object value) throws IOException {
|
protected void write(XContentBuilder b, Object value) throws IOException {
|
||||||
b.value((String) value);
|
b.value(((BytesRef) value).utf8ToString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
|
@ -41,6 +41,7 @@ import org.apache.lucene.search.TwoPhaseIterator;
|
||||||
import org.apache.lucene.search.Weight;
|
import org.apache.lucene.search.Weight;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IOFunction;
|
import org.apache.lucene.util.IOFunction;
|
||||||
import org.elasticsearch.common.CheckedIntFunction;
|
import org.elasticsearch.common.CheckedIntFunction;
|
||||||
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
|
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
|
||||||
|
@ -438,7 +439,13 @@ public final class SourceConfirmedTextQuery extends Query {
|
||||||
if (value == null) {
|
if (value == null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
cacheEntry.memoryIndex.addField(field, value.toString(), indexAnalyzer);
|
String valueStr;
|
||||||
|
if (value instanceof BytesRef valueRef) {
|
||||||
|
valueStr = valueRef.utf8ToString();
|
||||||
|
} else {
|
||||||
|
valueStr = value.toString();
|
||||||
|
}
|
||||||
|
cacheEntry.memoryIndex.addField(field, valueStr, indexAnalyzer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return cacheEntry.memoryIndex;
|
return cacheEntry.memoryIndex;
|
||||||
|
|
|
@ -123,7 +123,12 @@ public class MatchOnlyTextFieldMapperTests extends MapperTestCase {
|
||||||
ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234")));
|
ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234")));
|
||||||
List<IndexableField> fields = doc.rootDoc().getFields("field");
|
List<IndexableField> fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(1, fields.size());
|
assertEquals(1, fields.size());
|
||||||
assertEquals("1234", fields.get(0).stringValue());
|
|
||||||
|
var reader = fields.get(0).readerValue();
|
||||||
|
char[] buff = new char[20];
|
||||||
|
assertEquals(4, reader.read(buff));
|
||||||
|
assertEquals("1234", new String(buff, 0, 4));
|
||||||
|
|
||||||
IndexableFieldType fieldType = fields.get(0).fieldType();
|
IndexableFieldType fieldType = fields.get(0).fieldType();
|
||||||
assertThat(fieldType.omitNorms(), equalTo(true));
|
assertThat(fieldType.omitNorms(), equalTo(true));
|
||||||
assertTrue(fieldType.tokenized());
|
assertTrue(fieldType.tokenized());
|
||||||
|
|
|
@ -0,0 +1,53 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the "Elastic License
|
||||||
|
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||||
|
* Public License v 1"; you may not use this file except in compliance with, at
|
||||||
|
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||||
|
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.common.text;
|
||||||
|
|
||||||
|
import org.elasticsearch.xcontent.XContentString;
|
||||||
|
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.CharBuffer;
|
||||||
|
import java.nio.charset.CharsetDecoder;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reader that decodes UTF-8 formatted bytes into chars.
|
||||||
|
*/
|
||||||
|
public final class UTF8DecodingReader extends Reader {
|
||||||
|
private CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder();
|
||||||
|
private ByteBuffer bytes;
|
||||||
|
|
||||||
|
public UTF8DecodingReader(ByteBuffer bytes) {
|
||||||
|
this.bytes = bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public UTF8DecodingReader(XContentString.UTF8Bytes utf8bytes) {
|
||||||
|
this.bytes = ByteBuffer.wrap(utf8bytes.bytes(), utf8bytes.offset(), utf8bytes.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int read(char[] cbuf, int off, int len) {
|
||||||
|
return read(CharBuffer.wrap(cbuf, off, len));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int read(CharBuffer cbuf) {
|
||||||
|
if (bytes.hasRemaining() == false) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int startPos = cbuf.position();
|
||||||
|
decoder.decode(bytes, cbuf, true);
|
||||||
|
return cbuf.position() - startPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() {}
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue