mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-27 09:00:04 -04:00
Pull match_only_text fixes into main (#130049)
This brings in the fixes from #130020, with minor fixes to address review nits from that PR. Co-authored-by: Martijn van Groningen <martijn.v.groningen@gmail.com>
This commit is contained in:
parent
1d3bd46c6a
commit
40a7d02269
6 changed files with 328 additions and 8 deletions
|
@ -37,6 +37,7 @@
|
|||
<suppress files="qa[/\\]rolling-upgrade[/\\]src[/\\]javaRestTest[/\\]java[/\\]org[/\\]elasticsearch[/\\]upgrades[/\\]TsdbIT.java" checks="LineLength" />
|
||||
<suppress files="qa[/\\]rolling-upgrade[/\\]src[/\\]javaRestTest[/\\]java[/\\]org[/\\]elasticsearch[/\\]upgrades[/\\]TsdbIndexingRollingUpgradeIT.java" checks="LineLength" />
|
||||
<suppress files="qa[/\\]rolling-upgrade[/\\]src[/\\]javaRestTest[/\\]java[/\\]org[/\\]elasticsearch[/\\]upgrades[/\\]LogsdbIndexingRollingUpgradeIT.java" checks="LineLength" />
|
||||
<suppress files="plugin[/\\]logsdb[/\\]qa[/\\]rolling-upgrade[/\\]src[/\\]javaRestTest[/\\]java[/\\]org[/\\]elasticsearch[/\\]upgrades[/\\]MatchOnlyTextRollingUpgradeIT.java" checks="LineLength" />
|
||||
|
||||
<!-- Gradle requires inputs to be seriablizable -->
|
||||
<suppress files="build-tools-internal[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]gradle[/\\]internal[/\\]precommit[/\\]TestingConventionRule.java" checks="RegexpSinglelineJava" />
|
||||
|
|
|
@ -362,10 +362,38 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
|||
return toQuery(query, queryShardContext);
|
||||
}
|
||||
|
||||
private static class BytesFromMixedStringsBytesRefBlockLoader extends BlockStoredFieldsReader.StoredFieldsBlockLoader {
|
||||
BytesFromMixedStringsBytesRefBlockLoader(String field) {
|
||||
super(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Builder builder(BlockFactory factory, int expectedCount) {
|
||||
return factory.bytesRefs(expectedCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException {
|
||||
return new BlockStoredFieldsReader.Bytes(field) {
|
||||
private final BytesRef scratch = new BytesRef();
|
||||
|
||||
@Override
|
||||
protected BytesRef toBytesRef(Object v) {
|
||||
if (v instanceof BytesRef b) {
|
||||
return b;
|
||||
} else {
|
||||
assert v instanceof String;
|
||||
return BlockSourceReader.toBytesRef(scratch, v.toString());
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public BlockLoader blockLoader(BlockLoaderContext blContext) {
|
||||
if (textFieldType.isSyntheticSource()) {
|
||||
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource());
|
||||
return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource());
|
||||
}
|
||||
SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name()));
|
||||
// MatchOnlyText never has norms, so we have to use the field names field
|
||||
|
@ -386,7 +414,12 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
|||
) {
|
||||
@Override
|
||||
protected BytesRef storedToBytesRef(Object stored) {
|
||||
return (BytesRef) stored;
|
||||
if (stored instanceof BytesRef storedBytes) {
|
||||
return storedBytes;
|
||||
} else {
|
||||
assert stored instanceof String;
|
||||
return new BytesRef(stored.toString());
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -477,7 +510,12 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
|||
() -> new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), fieldType().name(), leafName()) {
|
||||
@Override
|
||||
protected void write(XContentBuilder b, Object value) throws IOException {
|
||||
b.value(((BytesRef) value).utf8ToString());
|
||||
if (value instanceof BytesRef valueBytes) {
|
||||
b.value(valueBytes.utf8ToString());
|
||||
} else {
|
||||
assert value instanceof String;
|
||||
b.value(value.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
|
|
|
@ -10,6 +10,9 @@
|
|||
package org.elasticsearch.index.mapper.extras;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
|
@ -21,6 +24,7 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.tests.analysis.CannedTokenStream;
|
||||
import org.apache.lucene.tests.analysis.Token;
|
||||
import org.apache.lucene.tests.index.RandomIndexWriter;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.core.Tuple;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
|
@ -350,4 +354,29 @@ public class MatchOnlyTextFieldMapperTests extends MapperTestCase {
|
|||
assertThat(fields, empty());
|
||||
}
|
||||
}
|
||||
|
||||
public void testLoadSyntheticSourceFromStringOrBytesRef() throws IOException {
|
||||
DocumentMapper mapper = createSytheticSourceMapperService(mapping(b -> {
|
||||
b.startObject("field1").field("type", "match_only_text").endObject();
|
||||
b.startObject("field2").field("type", "match_only_text").endObject();
|
||||
})).documentMapper();
|
||||
try (Directory directory = newDirectory()) {
|
||||
RandomIndexWriter iw = indexWriterForSyntheticSource(directory);
|
||||
|
||||
LuceneDocument document = new LuceneDocument();
|
||||
document.add(new StringField("field1", "foo", Field.Store.NO));
|
||||
document.add(new StoredField("field1._original", "foo"));
|
||||
|
||||
document.add(new StringField("field2", "bar", Field.Store.NO));
|
||||
document.add(new StoredField("field2._original", new BytesRef("bar")));
|
||||
|
||||
iw.addDocument(document);
|
||||
iw.close();
|
||||
|
||||
try (DirectoryReader indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) {
|
||||
String syntheticSource = syntheticSource(mapper, null, indexReader, 0);
|
||||
assertEquals("{\"field1\":\"foo\",\"field2\":\"bar\"}", syntheticSource);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -469,7 +469,7 @@ public abstract class BlockSourceReader implements BlockLoader.RowStrideReader {
|
|||
/**
|
||||
* Convert a {@link String} into a utf-8 {@link BytesRef}.
|
||||
*/
|
||||
static BytesRef toBytesRef(BytesRef scratch, String v) {
|
||||
public static BytesRef toBytesRef(BytesRef scratch, String v) {
|
||||
int len = UnicodeUtil.maxUTF8Length(v.length());
|
||||
if (scratch.bytes.length < len) {
|
||||
scratch.bytes = new byte[len];
|
||||
|
|
|
@ -35,10 +35,10 @@ public abstract class BlockStoredFieldsReader implements BlockLoader.RowStrideRe
|
|||
return true;
|
||||
}
|
||||
|
||||
private abstract static class StoredFieldsBlockLoader implements BlockLoader {
|
||||
public abstract static class StoredFieldsBlockLoader implements BlockLoader {
|
||||
protected final String field;
|
||||
|
||||
StoredFieldsBlockLoader(String field) {
|
||||
public StoredFieldsBlockLoader(String field) {
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
|
@ -112,10 +112,10 @@ public abstract class BlockStoredFieldsReader implements BlockLoader.RowStrideRe
|
|||
}
|
||||
}
|
||||
|
||||
private abstract static class Bytes extends BlockStoredFieldsReader {
|
||||
public abstract static class Bytes extends BlockStoredFieldsReader {
|
||||
private final String field;
|
||||
|
||||
Bytes(String field) {
|
||||
public Bytes(String field) {
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,252 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.upgrades;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.Name;
|
||||
|
||||
import org.elasticsearch.client.Request;
|
||||
import org.elasticsearch.client.Response;
|
||||
import org.elasticsearch.client.ResponseException;
|
||||
import org.elasticsearch.common.network.NetworkAddress;
|
||||
import org.elasticsearch.common.time.DateFormatter;
|
||||
import org.elasticsearch.common.time.FormatNames;
|
||||
import org.elasticsearch.common.xcontent.XContentHelper;
|
||||
import org.elasticsearch.test.rest.ObjectPath;
|
||||
import org.elasticsearch.xcontent.XContentType;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.elasticsearch.upgrades.LogsIndexModeRollingUpgradeIT.enableLogsdbByDefault;
|
||||
import static org.elasticsearch.upgrades.LogsIndexModeRollingUpgradeIT.getWriteBackingIndex;
|
||||
import static org.hamcrest.Matchers.containsString;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
|
||||
import static org.hamcrest.Matchers.notNullValue;
|
||||
|
||||
public class MatchOnlyTextRollingUpgradeIT extends AbstractRollingUpgradeWithSecurityTestCase {
|
||||
|
||||
static String BULK_ITEM_TEMPLATE =
|
||||
"""
|
||||
{"@timestamp": "$now", "host.name": "$host", "method": "$method", "ip": "$ip", "message": "$message", "length": $length, "factor": $factor}
|
||||
""";
|
||||
|
||||
private static final String TEMPLATE = """
|
||||
{
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"@timestamp" : {
|
||||
"type": "date"
|
||||
},
|
||||
"method": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"message": {
|
||||
"type": "match_only_text"
|
||||
},
|
||||
"ip": {
|
||||
"type": "ip"
|
||||
},
|
||||
"length": {
|
||||
"type": "long"
|
||||
},
|
||||
"factor": {
|
||||
"type": "double"
|
||||
}
|
||||
}
|
||||
}
|
||||
}""";
|
||||
|
||||
public MatchOnlyTextRollingUpgradeIT(@Name("upgradedNodes") int upgradedNodes) {
|
||||
super(upgradedNodes);
|
||||
}
|
||||
|
||||
public void testIndexing() throws Exception {
|
||||
String dataStreamName = "logs-bwc-test";
|
||||
if (isOldCluster()) {
|
||||
startTrial();
|
||||
enableLogsdbByDefault();
|
||||
createTemplate(dataStreamName, getClass().getSimpleName().toLowerCase(Locale.ROOT), TEMPLATE);
|
||||
|
||||
Instant startTime = Instant.now().minusSeconds(60 * 60);
|
||||
bulkIndex(dataStreamName, 4, 1024, startTime);
|
||||
|
||||
String firstBackingIndex = getWriteBackingIndex(client(), dataStreamName, 0);
|
||||
var settings = (Map<?, ?>) getIndexSettingsWithDefaults(firstBackingIndex).get(firstBackingIndex);
|
||||
assertThat(((Map<?, ?>) settings.get("settings")).get("index.mode"), equalTo("logsdb"));
|
||||
assertThat(((Map<?, ?>) settings.get("defaults")).get("index.mapping.source.mode"), equalTo("SYNTHETIC"));
|
||||
|
||||
ensureGreen(dataStreamName);
|
||||
search(dataStreamName);
|
||||
query(dataStreamName);
|
||||
} else if (isMixedCluster()) {
|
||||
Instant startTime = Instant.now().minusSeconds(60 * 30);
|
||||
bulkIndex(dataStreamName, 4, 1024, startTime);
|
||||
|
||||
ensureGreen(dataStreamName);
|
||||
search(dataStreamName);
|
||||
query(dataStreamName);
|
||||
} else if (isUpgradedCluster()) {
|
||||
ensureGreen(dataStreamName);
|
||||
Instant startTime = Instant.now();
|
||||
bulkIndex(dataStreamName, 4, 1024, startTime);
|
||||
search(dataStreamName);
|
||||
query(dataStreamName);
|
||||
|
||||
var forceMergeRequest = new Request("POST", "/" + dataStreamName + "/_forcemerge");
|
||||
forceMergeRequest.addParameter("max_num_segments", "1");
|
||||
assertOK(client().performRequest(forceMergeRequest));
|
||||
|
||||
ensureGreen(dataStreamName);
|
||||
search(dataStreamName);
|
||||
query(dataStreamName);
|
||||
}
|
||||
}
|
||||
|
||||
static void createTemplate(String dataStreamName, String id, String template) throws IOException {
|
||||
final String INDEX_TEMPLATE = """
|
||||
{
|
||||
"index_patterns": ["$DATASTREAM"],
|
||||
"template": $TEMPLATE,
|
||||
"data_stream": {
|
||||
}
|
||||
}""";
|
||||
var putIndexTemplateRequest = new Request("POST", "/_index_template/" + id);
|
||||
putIndexTemplateRequest.setJsonEntity(INDEX_TEMPLATE.replace("$TEMPLATE", template).replace("$DATASTREAM", dataStreamName));
|
||||
assertOK(client().performRequest(putIndexTemplateRequest));
|
||||
}
|
||||
|
||||
static String bulkIndex(String dataStreamName, int numRequest, int numDocs, Instant startTime) throws Exception {
|
||||
String firstIndex = null;
|
||||
for (int i = 0; i < numRequest; i++) {
|
||||
var bulkRequest = new Request("POST", "/" + dataStreamName + "/_bulk");
|
||||
StringBuilder requestBody = new StringBuilder();
|
||||
for (int j = 0; j < numDocs; j++) {
|
||||
String hostName = "host" + j % 50; // Not realistic, but makes asserting search / query response easier.
|
||||
String methodName = "method" + j % 5;
|
||||
String ip = NetworkAddress.format(randomIp(true));
|
||||
String param = "chicken" + randomInt(5);
|
||||
String message = "the quick brown fox jumps over the " + param;
|
||||
long length = randomLong();
|
||||
double factor = randomDouble();
|
||||
|
||||
requestBody.append("{\"create\": {}}");
|
||||
requestBody.append('\n');
|
||||
requestBody.append(
|
||||
BULK_ITEM_TEMPLATE.replace("$now", formatInstant(startTime))
|
||||
.replace("$host", hostName)
|
||||
.replace("$method", methodName)
|
||||
.replace("$ip", ip)
|
||||
.replace("$message", message)
|
||||
.replace("$length", Long.toString(length))
|
||||
.replace("$factor", Double.toString(factor))
|
||||
);
|
||||
requestBody.append('\n');
|
||||
|
||||
startTime = startTime.plusMillis(1);
|
||||
}
|
||||
bulkRequest.setJsonEntity(requestBody.toString());
|
||||
bulkRequest.addParameter("refresh", "true");
|
||||
var response = client().performRequest(bulkRequest);
|
||||
assertOK(response);
|
||||
var responseBody = entityAsMap(response);
|
||||
assertThat("errors in response:\n " + responseBody, responseBody.get("errors"), equalTo(false));
|
||||
if (firstIndex == null) {
|
||||
firstIndex = (String) ((Map<?, ?>) ((Map<?, ?>) ((List<?>) responseBody.get("items")).get(0)).get("create")).get("_index");
|
||||
}
|
||||
}
|
||||
return firstIndex;
|
||||
}
|
||||
|
||||
void search(String dataStreamName) throws Exception {
|
||||
var searchRequest = new Request("POST", "/" + dataStreamName + "/_search");
|
||||
searchRequest.addParameter("pretty", "true");
|
||||
searchRequest.setJsonEntity("""
|
||||
{
|
||||
"size": 500,
|
||||
"query": {
|
||||
"match_phrase": {
|
||||
"message": "chicken"
|
||||
}
|
||||
}
|
||||
}
|
||||
""".replace("chicken", "chicken" + randomInt(5)));
|
||||
var response = client().performRequest(searchRequest);
|
||||
assertOK(response);
|
||||
var responseBody = entityAsMap(response);
|
||||
logger.info("{}", responseBody);
|
||||
|
||||
Integer totalCount = ObjectPath.evaluate(responseBody, "hits.total.value");
|
||||
assertThat(totalCount, greaterThanOrEqualTo(512));
|
||||
}
|
||||
|
||||
void query(String dataStreamName) throws Exception {
|
||||
var queryRequest = new Request("POST", "/_query");
|
||||
queryRequest.addParameter("pretty", "true");
|
||||
queryRequest.setJsonEntity("""
|
||||
{
|
||||
"query": "FROM $ds | STATS max(length), max(factor) BY message | SORT message | LIMIT 5"
|
||||
}
|
||||
""".replace("$ds", dataStreamName));
|
||||
var response = client().performRequest(queryRequest);
|
||||
assertOK(response);
|
||||
var responseBody = entityAsMap(response);
|
||||
logger.info("{}", responseBody);
|
||||
|
||||
String column1 = ObjectPath.evaluate(responseBody, "columns.0.name");
|
||||
String column2 = ObjectPath.evaluate(responseBody, "columns.1.name");
|
||||
String column3 = ObjectPath.evaluate(responseBody, "columns.2.name");
|
||||
assertThat(column1, equalTo("max(length)"));
|
||||
assertThat(column2, equalTo("max(factor)"));
|
||||
assertThat(column3, equalTo("message"));
|
||||
|
||||
String key = ObjectPath.evaluate(responseBody, "values.0.2");
|
||||
assertThat(key, equalTo("the quick brown fox jumps over the chicken0"));
|
||||
Long maxRx = ObjectPath.evaluate(responseBody, "values.0.0");
|
||||
assertThat(maxRx, notNullValue());
|
||||
Double maxTx = ObjectPath.evaluate(responseBody, "values.0.1");
|
||||
assertThat(maxTx, notNullValue());
|
||||
}
|
||||
|
||||
protected static void startTrial() throws IOException {
|
||||
Request startTrial = new Request("POST", "/_license/start_trial");
|
||||
startTrial.addParameter("acknowledge", "true");
|
||||
try {
|
||||
assertOK(client().performRequest(startTrial));
|
||||
} catch (ResponseException e) {
|
||||
var responseBody = entityAsMap(e.getResponse());
|
||||
String error = ObjectPath.evaluate(responseBody, "error_message");
|
||||
assertThat(error, containsString("Trial was already activated."));
|
||||
}
|
||||
}
|
||||
|
||||
static Map<String, Object> getIndexSettingsWithDefaults(String index) throws IOException {
|
||||
Request request = new Request("GET", "/" + index + "/_settings");
|
||||
request.addParameter("flat_settings", "true");
|
||||
request.addParameter("include_defaults", "true");
|
||||
Response response = client().performRequest(request);
|
||||
try (InputStream is = response.getEntity().getContent()) {
|
||||
return XContentHelper.convertToMap(
|
||||
XContentType.fromMediaType(response.getEntity().getContentType().getValue()).xContent(),
|
||||
is,
|
||||
true
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
static String formatInstant(Instant instant) {
|
||||
return DateFormatter.forPattern(FormatNames.STRICT_DATE_OPTIONAL_TIME.getName()).format(instant);
|
||||
}
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue