Pull match_only_text fixes into main (#130049)

This brings in the fixes from #130020, with minor fixes to address review
nits from that PR.

Co-authored-by: Martijn van Groningen <martijn.v.groningen@gmail.com>
This commit is contained in:
Jordan Powers 2025-06-26 11:31:33 -07:00 committed by GitHub
parent 1d3bd46c6a
commit 40a7d02269
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 328 additions and 8 deletions

View file

@ -37,6 +37,7 @@
<suppress files="qa[/\\]rolling-upgrade[/\\]src[/\\]javaRestTest[/\\]java[/\\]org[/\\]elasticsearch[/\\]upgrades[/\\]TsdbIT.java" checks="LineLength" />
<suppress files="qa[/\\]rolling-upgrade[/\\]src[/\\]javaRestTest[/\\]java[/\\]org[/\\]elasticsearch[/\\]upgrades[/\\]TsdbIndexingRollingUpgradeIT.java" checks="LineLength" />
<suppress files="qa[/\\]rolling-upgrade[/\\]src[/\\]javaRestTest[/\\]java[/\\]org[/\\]elasticsearch[/\\]upgrades[/\\]LogsdbIndexingRollingUpgradeIT.java" checks="LineLength" />
<suppress files="plugin[/\\]logsdb[/\\]qa[/\\]rolling-upgrade[/\\]src[/\\]javaRestTest[/\\]java[/\\]org[/\\]elasticsearch[/\\]upgrades[/\\]MatchOnlyTextRollingUpgradeIT.java" checks="LineLength" />
<!-- Gradle requires inputs to be seriablizable -->
<suppress files="build-tools-internal[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]gradle[/\\]internal[/\\]precommit[/\\]TestingConventionRule.java" checks="RegexpSinglelineJava" />

View file

@ -362,10 +362,38 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
return toQuery(query, queryShardContext);
}
private static class BytesFromMixedStringsBytesRefBlockLoader extends BlockStoredFieldsReader.StoredFieldsBlockLoader {
BytesFromMixedStringsBytesRefBlockLoader(String field) {
super(field);
}
@Override
public Builder builder(BlockFactory factory, int expectedCount) {
return factory.bytesRefs(expectedCount);
}
@Override
public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException {
return new BlockStoredFieldsReader.Bytes(field) {
private final BytesRef scratch = new BytesRef();
@Override
protected BytesRef toBytesRef(Object v) {
if (v instanceof BytesRef b) {
return b;
} else {
assert v instanceof String;
return BlockSourceReader.toBytesRef(scratch, v.toString());
}
}
};
}
}
@Override
public BlockLoader blockLoader(BlockLoaderContext blContext) {
if (textFieldType.isSyntheticSource()) {
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource());
return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource());
}
SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name()));
// MatchOnlyText never has norms, so we have to use the field names field
@ -386,7 +414,12 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
) {
@Override
protected BytesRef storedToBytesRef(Object stored) {
return (BytesRef) stored;
if (stored instanceof BytesRef storedBytes) {
return storedBytes;
} else {
assert stored instanceof String;
return new BytesRef(stored.toString());
}
}
};
}
@ -477,7 +510,12 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
() -> new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), fieldType().name(), leafName()) {
@Override
protected void write(XContentBuilder b, Object value) throws IOException {
b.value(((BytesRef) value).utf8ToString());
if (value instanceof BytesRef valueBytes) {
b.value(valueBytes.utf8ToString());
} else {
assert value instanceof String;
b.value(value.toString());
}
}
}
);

View file

@ -10,6 +10,9 @@
package org.elasticsearch.index.mapper.extras;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
@ -21,6 +24,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.CannedTokenStream;
import org.apache.lucene.tests.analysis.Token;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Strings;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.IndexSettings;
@ -350,4 +354,29 @@ public class MatchOnlyTextFieldMapperTests extends MapperTestCase {
assertThat(fields, empty());
}
}
public void testLoadSyntheticSourceFromStringOrBytesRef() throws IOException {
DocumentMapper mapper = createSytheticSourceMapperService(mapping(b -> {
b.startObject("field1").field("type", "match_only_text").endObject();
b.startObject("field2").field("type", "match_only_text").endObject();
})).documentMapper();
try (Directory directory = newDirectory()) {
RandomIndexWriter iw = indexWriterForSyntheticSource(directory);
LuceneDocument document = new LuceneDocument();
document.add(new StringField("field1", "foo", Field.Store.NO));
document.add(new StoredField("field1._original", "foo"));
document.add(new StringField("field2", "bar", Field.Store.NO));
document.add(new StoredField("field2._original", new BytesRef("bar")));
iw.addDocument(document);
iw.close();
try (DirectoryReader indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) {
String syntheticSource = syntheticSource(mapper, null, indexReader, 0);
assertEquals("{\"field1\":\"foo\",\"field2\":\"bar\"}", syntheticSource);
}
}
}
}

View file

@ -469,7 +469,7 @@ public abstract class BlockSourceReader implements BlockLoader.RowStrideReader {
/**
* Convert a {@link String} into a utf-8 {@link BytesRef}.
*/
static BytesRef toBytesRef(BytesRef scratch, String v) {
public static BytesRef toBytesRef(BytesRef scratch, String v) {
int len = UnicodeUtil.maxUTF8Length(v.length());
if (scratch.bytes.length < len) {
scratch.bytes = new byte[len];

View file

@ -35,10 +35,10 @@ public abstract class BlockStoredFieldsReader implements BlockLoader.RowStrideRe
return true;
}
private abstract static class StoredFieldsBlockLoader implements BlockLoader {
public abstract static class StoredFieldsBlockLoader implements BlockLoader {
protected final String field;
StoredFieldsBlockLoader(String field) {
public StoredFieldsBlockLoader(String field) {
this.field = field;
}
@ -112,10 +112,10 @@ public abstract class BlockStoredFieldsReader implements BlockLoader.RowStrideRe
}
}
private abstract static class Bytes extends BlockStoredFieldsReader {
public abstract static class Bytes extends BlockStoredFieldsReader {
private final String field;
Bytes(String field) {
public Bytes(String field) {
this.field = field;
}

View file

@ -0,0 +1,252 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.upgrades;
import com.carrotsearch.randomizedtesting.annotations.Name;
import org.elasticsearch.client.Request;
import org.elasticsearch.client.Response;
import org.elasticsearch.client.ResponseException;
import org.elasticsearch.common.network.NetworkAddress;
import org.elasticsearch.common.time.DateFormatter;
import org.elasticsearch.common.time.FormatNames;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.test.rest.ObjectPath;
import org.elasticsearch.xcontent.XContentType;
import java.io.IOException;
import java.io.InputStream;
import java.time.Instant;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import static org.elasticsearch.upgrades.LogsIndexModeRollingUpgradeIT.enableLogsdbByDefault;
import static org.elasticsearch.upgrades.LogsIndexModeRollingUpgradeIT.getWriteBackingIndex;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.notNullValue;
public class MatchOnlyTextRollingUpgradeIT extends AbstractRollingUpgradeWithSecurityTestCase {
static String BULK_ITEM_TEMPLATE =
"""
{"@timestamp": "$now", "host.name": "$host", "method": "$method", "ip": "$ip", "message": "$message", "length": $length, "factor": $factor}
""";
private static final String TEMPLATE = """
{
"mappings": {
"properties": {
"@timestamp" : {
"type": "date"
},
"method": {
"type": "keyword"
},
"message": {
"type": "match_only_text"
},
"ip": {
"type": "ip"
},
"length": {
"type": "long"
},
"factor": {
"type": "double"
}
}
}
}""";
public MatchOnlyTextRollingUpgradeIT(@Name("upgradedNodes") int upgradedNodes) {
super(upgradedNodes);
}
public void testIndexing() throws Exception {
String dataStreamName = "logs-bwc-test";
if (isOldCluster()) {
startTrial();
enableLogsdbByDefault();
createTemplate(dataStreamName, getClass().getSimpleName().toLowerCase(Locale.ROOT), TEMPLATE);
Instant startTime = Instant.now().minusSeconds(60 * 60);
bulkIndex(dataStreamName, 4, 1024, startTime);
String firstBackingIndex = getWriteBackingIndex(client(), dataStreamName, 0);
var settings = (Map<?, ?>) getIndexSettingsWithDefaults(firstBackingIndex).get(firstBackingIndex);
assertThat(((Map<?, ?>) settings.get("settings")).get("index.mode"), equalTo("logsdb"));
assertThat(((Map<?, ?>) settings.get("defaults")).get("index.mapping.source.mode"), equalTo("SYNTHETIC"));
ensureGreen(dataStreamName);
search(dataStreamName);
query(dataStreamName);
} else if (isMixedCluster()) {
Instant startTime = Instant.now().minusSeconds(60 * 30);
bulkIndex(dataStreamName, 4, 1024, startTime);
ensureGreen(dataStreamName);
search(dataStreamName);
query(dataStreamName);
} else if (isUpgradedCluster()) {
ensureGreen(dataStreamName);
Instant startTime = Instant.now();
bulkIndex(dataStreamName, 4, 1024, startTime);
search(dataStreamName);
query(dataStreamName);
var forceMergeRequest = new Request("POST", "/" + dataStreamName + "/_forcemerge");
forceMergeRequest.addParameter("max_num_segments", "1");
assertOK(client().performRequest(forceMergeRequest));
ensureGreen(dataStreamName);
search(dataStreamName);
query(dataStreamName);
}
}
static void createTemplate(String dataStreamName, String id, String template) throws IOException {
final String INDEX_TEMPLATE = """
{
"index_patterns": ["$DATASTREAM"],
"template": $TEMPLATE,
"data_stream": {
}
}""";
var putIndexTemplateRequest = new Request("POST", "/_index_template/" + id);
putIndexTemplateRequest.setJsonEntity(INDEX_TEMPLATE.replace("$TEMPLATE", template).replace("$DATASTREAM", dataStreamName));
assertOK(client().performRequest(putIndexTemplateRequest));
}
static String bulkIndex(String dataStreamName, int numRequest, int numDocs, Instant startTime) throws Exception {
String firstIndex = null;
for (int i = 0; i < numRequest; i++) {
var bulkRequest = new Request("POST", "/" + dataStreamName + "/_bulk");
StringBuilder requestBody = new StringBuilder();
for (int j = 0; j < numDocs; j++) {
String hostName = "host" + j % 50; // Not realistic, but makes asserting search / query response easier.
String methodName = "method" + j % 5;
String ip = NetworkAddress.format(randomIp(true));
String param = "chicken" + randomInt(5);
String message = "the quick brown fox jumps over the " + param;
long length = randomLong();
double factor = randomDouble();
requestBody.append("{\"create\": {}}");
requestBody.append('\n');
requestBody.append(
BULK_ITEM_TEMPLATE.replace("$now", formatInstant(startTime))
.replace("$host", hostName)
.replace("$method", methodName)
.replace("$ip", ip)
.replace("$message", message)
.replace("$length", Long.toString(length))
.replace("$factor", Double.toString(factor))
);
requestBody.append('\n');
startTime = startTime.plusMillis(1);
}
bulkRequest.setJsonEntity(requestBody.toString());
bulkRequest.addParameter("refresh", "true");
var response = client().performRequest(bulkRequest);
assertOK(response);
var responseBody = entityAsMap(response);
assertThat("errors in response:\n " + responseBody, responseBody.get("errors"), equalTo(false));
if (firstIndex == null) {
firstIndex = (String) ((Map<?, ?>) ((Map<?, ?>) ((List<?>) responseBody.get("items")).get(0)).get("create")).get("_index");
}
}
return firstIndex;
}
void search(String dataStreamName) throws Exception {
var searchRequest = new Request("POST", "/" + dataStreamName + "/_search");
searchRequest.addParameter("pretty", "true");
searchRequest.setJsonEntity("""
{
"size": 500,
"query": {
"match_phrase": {
"message": "chicken"
}
}
}
""".replace("chicken", "chicken" + randomInt(5)));
var response = client().performRequest(searchRequest);
assertOK(response);
var responseBody = entityAsMap(response);
logger.info("{}", responseBody);
Integer totalCount = ObjectPath.evaluate(responseBody, "hits.total.value");
assertThat(totalCount, greaterThanOrEqualTo(512));
}
void query(String dataStreamName) throws Exception {
var queryRequest = new Request("POST", "/_query");
queryRequest.addParameter("pretty", "true");
queryRequest.setJsonEntity("""
{
"query": "FROM $ds | STATS max(length), max(factor) BY message | SORT message | LIMIT 5"
}
""".replace("$ds", dataStreamName));
var response = client().performRequest(queryRequest);
assertOK(response);
var responseBody = entityAsMap(response);
logger.info("{}", responseBody);
String column1 = ObjectPath.evaluate(responseBody, "columns.0.name");
String column2 = ObjectPath.evaluate(responseBody, "columns.1.name");
String column3 = ObjectPath.evaluate(responseBody, "columns.2.name");
assertThat(column1, equalTo("max(length)"));
assertThat(column2, equalTo("max(factor)"));
assertThat(column3, equalTo("message"));
String key = ObjectPath.evaluate(responseBody, "values.0.2");
assertThat(key, equalTo("the quick brown fox jumps over the chicken0"));
Long maxRx = ObjectPath.evaluate(responseBody, "values.0.0");
assertThat(maxRx, notNullValue());
Double maxTx = ObjectPath.evaluate(responseBody, "values.0.1");
assertThat(maxTx, notNullValue());
}
protected static void startTrial() throws IOException {
Request startTrial = new Request("POST", "/_license/start_trial");
startTrial.addParameter("acknowledge", "true");
try {
assertOK(client().performRequest(startTrial));
} catch (ResponseException e) {
var responseBody = entityAsMap(e.getResponse());
String error = ObjectPath.evaluate(responseBody, "error_message");
assertThat(error, containsString("Trial was already activated."));
}
}
static Map<String, Object> getIndexSettingsWithDefaults(String index) throws IOException {
Request request = new Request("GET", "/" + index + "/_settings");
request.addParameter("flat_settings", "true");
request.addParameter("include_defaults", "true");
Response response = client().performRequest(request);
try (InputStream is = response.getEntity().getContent()) {
return XContentHelper.convertToMap(
XContentType.fromMediaType(response.getEntity().getContentType().getValue()).xContent(),
is,
true
);
}
}
static String formatInstant(Instant instant) {
return DateFormatter.forPattern(FormatNames.STRICT_DATE_OPTIONAL_TIME.getName()).format(instant);
}
}