Fix duplicate strings in SearchHit serialization (#127180)

The map key is always the field name. We exploited this fact in the get results but not in
search hits, leading to a lot of duplicate strings in many heap dumps.
We could do much better here since the names are generally coming out of a know limited set of names,
but as a first step lets at least align the get- and search-responses and non-trivial amount of bytes
in a number of use-cases. Plus, having a single string instance is faster on lookup etc. and saves on CPU
also.
This commit is contained in:
Armin Braun 2025-04-22 22:43:27 +02:00 committed by GitHub
parent 03fab288e4
commit cd609533bf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 65 additions and 51 deletions

View file

@ -100,7 +100,7 @@ final class PercolatorMatchedSlotSubFetchPhase implements FetchSubPhase {
IntStream slots = convertTopDocsToSlots(topDocs, pc.rootDocsBySlot);
// _percolator_document_slot fields are document fields and should be under "fields" section in a hit
List<Object> docSlots = slots.boxed().collect(Collectors.toList());
hitContext.hit().setDocumentField(fieldName, new DocumentField(fieldName, docSlots));
hitContext.hit().setDocumentField(new DocumentField(fieldName, docSlots));
// Add info what sub-queries of percolator query matched this each percolated document
if (fetchContext.getSearchExecutionContext().hasNamedQueries()) {
@ -120,7 +120,7 @@ final class PercolatorMatchedSlotSubFetchPhase implements FetchSubPhase {
matchedQueries.add(match.getName());
}
String matchedFieldName = fieldName + "_" + docSlots.get(i) + "_matched_queries";
hitContext.hit().setDocumentField(matchedFieldName, new DocumentField(matchedFieldName, matchedQueries));
hitContext.hit().setDocumentField(new DocumentField(matchedFieldName, matchedQueries));
}
}
}

View file

@ -1327,8 +1327,7 @@ public class TopHitsIT extends ESIntegTestCase {
public void process(FetchSubPhase.HitContext hitContext) {
leafSearchLookup.setDocument(hitContext.docId());
FieldLookup fieldLookup = leafSearchLookup.fields().get("text");
hitContext.hit()
.setDocumentField("text_stored_lookup", new DocumentField("text_stored_lookup", fieldLookup.getValues()));
hitContext.hit().setDocumentField(new DocumentField("text_stored_lookup", fieldLookup.getValues()));
}
@Override

View file

@ -137,7 +137,7 @@ public class FetchSubPhasePluginIT extends ESIntegTestCase {
DocumentField hitField = hitContext.hit().getFields().get(NAME);
if (hitField == null) {
hitField = new DocumentField(NAME, new ArrayList<>(1));
hitContext.hit().setDocumentField(NAME, hitField);
hitContext.hit().setDocumentField(hitField);
}
Terms terms = hitContext.reader().termVectors().get(hitContext.docId(), field);
if (terms != null) {

View file

@ -233,6 +233,7 @@ public class TransportVersions {
public static final TransportVersion SEARCH_INCREMENTAL_TOP_DOCS_NULL = def(9_058_0_00);
public static final TransportVersion COMPRESS_DELAYABLE_WRITEABLE = def(9_059_0_00);
public static final TransportVersion SYNONYMS_REFRESH_PARAM = def(9_060_0_00);
public static final TransportVersion DOC_FIELDS_AS_LIST = def(9_061_0_00);
/*
* STOP! READ THIS FIRST! No, really,

View file

@ -24,6 +24,7 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken;
@ -70,6 +71,15 @@ public class DocumentField implements Writeable, Iterable<Object> {
: "DocumentField can't have both lookup fields and values";
}
/**
* Read map of document fields written via {@link StreamOutput#writeMapValues(Map)}.
* @param in stream input
* @return map of {@link DocumentField} keyed by {@link DocumentField#getName()}
*/
public static Map<String, DocumentField> readFieldsFromMapValues(StreamInput in) throws IOException {
return in.readMapValues(DocumentField::new, DocumentField::getName);
}
/**
* The name of the field.
*/

View file

@ -74,8 +74,8 @@ public class GetResult implements Writeable, Iterable<DocumentField>, ToXContent
if (source.length() == 0) {
source = null;
}
documentFields = in.readMapValues(DocumentField::new, DocumentField::getName);
metaFields = in.readMapValues(DocumentField::new, DocumentField::getName);
documentFields = DocumentField.readFieldsFromMapValues(in);
metaFields = DocumentField.readFieldsFromMapValues(in);
} else {
metaFields = Collections.emptyMap();
documentFields = Collections.emptyMap();

View file

@ -216,8 +216,15 @@ public final class SearchHit implements Writeable, ToXContentObject, RefCounted
if (in.readBoolean()) {
explanation = readExplanation(in);
}
final Map<String, DocumentField> documentFields = in.readMap(DocumentField::new);
final Map<String, DocumentField> metaFields = in.readMap(DocumentField::new);
final Map<String, DocumentField> documentFields;
final Map<String, DocumentField> metaFields;
if (in.getTransportVersion().onOrAfter(TransportVersions.DOC_FIELDS_AS_LIST)) {
documentFields = DocumentField.readFieldsFromMapValues(in);
metaFields = DocumentField.readFieldsFromMapValues(in);
} else {
documentFields = in.readMap(DocumentField::new);
metaFields = in.readMap(DocumentField::new);
}
Map<String, HighlightField> highlightFields = in.readMapValues(HighlightField::new, HighlightField::name);
highlightFields = highlightFields.isEmpty() ? null : unmodifiableMap(highlightFields);
@ -322,8 +329,13 @@ public final class SearchHit implements Writeable, ToXContentObject, RefCounted
out.writeBoolean(true);
writeExplanation(out, explanation);
}
out.writeMap(documentFields, StreamOutput::writeWriteable);
out.writeMap(metaFields, StreamOutput::writeWriteable);
if (out.getTransportVersion().onOrAfter(TransportVersions.DOC_FIELDS_AS_LIST)) {
out.writeMapValues(documentFields);
out.writeMapValues(metaFields);
} else {
out.writeMap(documentFields, StreamOutput::writeWriteable);
out.writeMap(metaFields, StreamOutput::writeWriteable);
}
if (highlightFields == null) {
out.writeVInt(0);
} else {
@ -502,9 +514,9 @@ public final class SearchHit implements Writeable, ToXContentObject, RefCounted
/*
* Adds a new DocumentField to the map in case both parameters are not null.
* */
public void setDocumentField(String fieldName, DocumentField field) {
if (fieldName == null || field == null) return;
this.documentFields.put(fieldName, field);
public void setDocumentField(DocumentField field) {
if (field == null) return;
this.documentFields.put(field.getName(), field);
}
public void addDocumentFields(Map<String, DocumentField> docFields, Map<String, DocumentField> metaFields) {

View file

@ -76,7 +76,7 @@ public final class FetchDocValuesPhase implements FetchSubPhase {
hitField = new DocumentField(f.field, new ArrayList<>(2));
// even if we request a doc values of a meta-field (e.g. _routing),
// docValues fields will still be document fields, and put under "fields" section of a hit.
hit.hit().setDocumentField(f.field, hitField);
hit.hit().setDocumentField(hitField);
}
List<Object> ignoredValues = new ArrayList<>();
hitField.getValues().addAll(f.fetcher.fetchValues(hit.source(), hit.docId(), ignoredValues));

View file

@ -75,7 +75,7 @@ public final class ScriptFieldsPhase implements FetchSubPhase {
}
hitField = new DocumentField(scriptFieldName, values);
// script fields are never meta-fields
hitContext.hit().setDocumentField(scriptFieldName, hitField);
hitContext.hit().setDocumentField(hitField);
}
}
}

View file

@ -95,8 +95,7 @@ public class StoredFieldsPhase implements FetchSubPhase {
Map<String, List<Object>> loadedFields = hitContext.loadedFields();
for (StoredField storedField : storedFields) {
if (storedField.hasValue(loadedFields)) {
hitContext.hit()
.setDocumentField(storedField.name, new DocumentField(storedField.name, storedField.process(loadedFields)));
hitContext.hit().setDocumentField(new DocumentField(storedField.name, storedField.process(loadedFields)));
}
}
}

View file

@ -116,7 +116,7 @@ public class ExpandSearchPhaseTests extends ESTestCase {
};
SearchHit hit = new SearchHit(1, "ID");
hit.setDocumentField("someField", new DocumentField("someField", Collections.singletonList(collapseValue)));
hit.setDocumentField(new DocumentField("someField", Collections.singletonList(collapseValue)));
ExpandSearchPhase phase = newExpandSearchPhase(
mockSearchPhaseContext,
new SearchResponseSections(
@ -188,9 +188,9 @@ public class ExpandSearchPhaseTests extends ESTestCase {
};
SearchHit hit1 = new SearchHit(1, "ID");
hit1.setDocumentField("someField", new DocumentField("someField", Collections.singletonList(collapseValue)));
hit1.setDocumentField(new DocumentField("someField", Collections.singletonList(collapseValue)));
SearchHit hit2 = new SearchHit(2, "ID2");
hit2.setDocumentField("someField", new DocumentField("someField", Collections.singletonList(collapseValue)));
hit2.setDocumentField(new DocumentField("someField", Collections.singletonList(collapseValue)));
try (
SearchResponseSections searchResponseSections = new SearchResponseSections(
new SearchHits(new SearchHit[] { hit1, hit2 }, new TotalHits(1, TotalHits.Relation.EQUAL_TO), 1.0F),
@ -225,9 +225,9 @@ public class ExpandSearchPhaseTests extends ESTestCase {
};
SearchHit hit1 = new SearchHit(1, "ID");
hit1.setDocumentField("someField", new DocumentField("someField", Collections.singletonList(null)));
hit1.setDocumentField(new DocumentField("someField", Collections.singletonList(null)));
SearchHit hit2 = new SearchHit(2, "ID2");
hit2.setDocumentField("someField", new DocumentField("someField", Collections.singletonList(null)));
hit2.setDocumentField(new DocumentField("someField", Collections.singletonList(null)));
ExpandSearchPhase phase = newExpandSearchPhase(
mockSearchPhaseContext,
new SearchResponseSections(
@ -313,7 +313,7 @@ public class ExpandSearchPhaseTests extends ESTestCase {
.routing("baz");
SearchHit hit = new SearchHit(1, "ID");
hit.setDocumentField("someField", new DocumentField("someField", Collections.singletonList("foo")));
hit.setDocumentField(new DocumentField("someField", Collections.singletonList("foo")));
try (
SearchResponseSections searchResponseSections = new SearchResponseSections(
new SearchHits(new SearchHit[] { hit }, new TotalHits(1, TotalHits.Relation.EQUAL_TO), 1.0F),
@ -384,7 +384,7 @@ public class ExpandSearchPhaseTests extends ESTestCase {
.routing("baz");
SearchHit hit = new SearchHit(1, "ID");
hit.setDocumentField("someField", new DocumentField("someField", Collections.singletonList("foo")));
hit.setDocumentField(new DocumentField("someField", Collections.singletonList("foo")));
try (
SearchResponseSections searchResponseSections = new SearchResponseSections(
new SearchHits(new SearchHit[] { hit }, new TotalHits(1, TotalHits.Relation.EQUAL_TO), 1.0F),

View file

@ -108,7 +108,7 @@ public class FetchLookupFieldsPhaseTests extends ESTestCase {
final SearchHits searchHits;
if (fields != null) {
final SearchHit hit = new SearchHit(randomInt(1000));
fields.forEach((f, values) -> hit.setDocumentField(f, new DocumentField(f, values, List.of())));
fields.forEach((f, values) -> hit.setDocumentField(new DocumentField(f, values, List.of())));
searchHits = new SearchHits(new SearchHit[] { hit }, new TotalHits(1, TotalHits.Relation.EQUAL_TO), 1.0f);
} else {
searchHits = SearchHits.empty(new TotalHits(0, TotalHits.Relation.EQUAL_TO), 1.0f);
@ -143,7 +143,6 @@ public class FetchLookupFieldsPhaseTests extends ESTestCase {
final List<FieldAndFormat> fetchFields = List.of(new FieldAndFormat(randomAlphaOfLength(10), null));
{
leftHit0.setDocumentField(
"lookup_field_1",
new DocumentField(
"lookup_field_1",
List.of(),
@ -155,7 +154,6 @@ public class FetchLookupFieldsPhaseTests extends ESTestCase {
)
);
leftHit0.setDocumentField(
"lookup_field_2",
new DocumentField(
"lookup_field_2",
List.of(),
@ -168,7 +166,6 @@ public class FetchLookupFieldsPhaseTests extends ESTestCase {
SearchHit leftHit1 = new SearchHit(randomInt(100));
{
leftHit1.setDocumentField(
"lookup_field_2",
new DocumentField(
"lookup_field_2",
List.of(),
@ -180,7 +177,6 @@ public class FetchLookupFieldsPhaseTests extends ESTestCase {
)
);
leftHit1.setDocumentField(
"lookup_field_3",
new DocumentField(
"lookup_field_3",
List.of(),

View file

@ -967,7 +967,7 @@ public class RankFeaturePhaseTests extends ESTestCase {
searchHits[i] = SearchHit.unpooled(scoreDocs[i].doc);
searchHits[i].shard(shardTarget);
searchHits[i].score(scoreDocs[i].score);
searchHits[i].setDocumentField(DEFAULT_FIELD, new DocumentField(DEFAULT_FIELD, Collections.singletonList(scoreDocs[i].doc)));
searchHits[i].setDocumentField(new DocumentField(DEFAULT_FIELD, Collections.singletonList(scoreDocs[i].doc)));
if (scoreDocs[i].score > maxScore) {
maxScore = scoreDocs[i].score;
}

View file

@ -297,16 +297,13 @@ public class RankFeatureShardPhaseTests extends ESTestCase {
searchContext.addFetchResult();
SearchHit[] hits = new SearchHit[3];
hits[0] = SearchHit.unpooled(4);
hits[0].setDocumentField(fieldName, new DocumentField(fieldName, Collections.singletonList(expectedFieldData.get(4))));
hits[0].setDocumentField(new DocumentField(fieldName, Collections.singletonList(expectedFieldData.get(4))));
hits[1] = SearchHit.unpooled(9);
hits[1].setDocumentField(fieldName, new DocumentField(fieldName, Collections.singletonList(expectedFieldData.get(9))));
hits[1].setDocumentField(new DocumentField(fieldName, Collections.singletonList(expectedFieldData.get(9))));
hits[2] = SearchHit.unpooled(numDocs - 1);
hits[2].setDocumentField(
fieldName,
new DocumentField(fieldName, Collections.singletonList(expectedFieldData.get(numDocs - 1)))
);
hits[2].setDocumentField(new DocumentField(fieldName, Collections.singletonList(expectedFieldData.get(numDocs - 1))));
searchHits = SearchHits.unpooled(hits, new TotalHits(3, TotalHits.Relation.EQUAL_TO), 1.0f);
searchContext.fetchResult().shardResult(searchHits, null);
when(searchContext.isCancelled()).thenReturn(false);

View file

@ -30,7 +30,7 @@ public class SearchHitBuilder {
}
public SearchHitBuilder addField(String name, List<Object> values) {
hit.setDocumentField(name, new DocumentField(name, values));
hit.setDocumentField(new DocumentField(name, values));
return this;
}

View file

@ -83,7 +83,7 @@ public class ComputingExtractorTests extends AbstractSqlWireSerializingTestCase<
double expected = Math.log(value);
DocumentField field = new DocumentField(fieldName, singletonList(value));
SearchHit hit = SearchHit.unpooled(1, null);
hit.setDocumentField(fieldName, field);
hit.setDocumentField(field);
assertEquals(expected, extractor.process(hit));
}
}

View file

@ -95,7 +95,7 @@ public class FieldHitExtractorTests extends AbstractSqlWireSerializingTestCase<F
DocumentField field = new DocumentField(fieldName, documentFieldValues);
SearchHit hit = SearchHit.unpooled(1, null);
hit.setDocumentField(fieldName, field);
hit.setDocumentField(field);
Object result = documentFieldValues.isEmpty() ? null : documentFieldValues.get(0);
assertEquals(result, extractor.extract(hit));
}
@ -113,7 +113,7 @@ public class FieldHitExtractorTests extends AbstractSqlWireSerializingTestCase<F
}
DocumentField field = new DocumentField(fieldName, documentFieldValues);
SearchHit hit = SearchHit.unpooled(1, null);
hit.setDocumentField(fieldName, field);
hit.setDocumentField(field);
Object result = documentFieldValues.isEmpty() ? null : documentFieldValues.get(0);
assertEquals(result, extractor.extract(hit));
}
@ -128,7 +128,7 @@ public class FieldHitExtractorTests extends AbstractSqlWireSerializingTestCase<F
List<Object> documentFieldValues = Collections.singletonList(StringUtils.toString(zdt));
DocumentField field = new DocumentField("my_date_nanos_field", documentFieldValues);
SearchHit hit = SearchHit.unpooled(1, null);
hit.setDocumentField("my_date_nanos_field", field);
hit.setDocumentField(field);
FieldHitExtractor extractor = new FieldHitExtractor("my_date_nanos_field", DATETIME, zoneId, LENIENT);
assertEquals(zdt, extractor.extract(hit));
}
@ -145,7 +145,7 @@ public class FieldHitExtractorTests extends AbstractSqlWireSerializingTestCase<F
FieldHitExtractor fe = getFieldHitExtractor(fieldName);
DocumentField field = new DocumentField(fieldName, asList("a", "b"));
SearchHit hit = SearchHit.unpooled(1, null);
hit.setDocumentField(fieldName, field);
hit.setDocumentField(field);
Exception ex = expectThrows(InvalidArgumentException.class, () -> fe.extract(hit));
assertThat(ex.getMessage(), is("Arrays (returned by [" + fieldName + "]) are not supported"));
}
@ -155,7 +155,7 @@ public class FieldHitExtractorTests extends AbstractSqlWireSerializingTestCase<F
Object value = randomValue();
DocumentField field = new DocumentField("a.b.c", singletonList(value));
SearchHit hit = SearchHit.unpooled(1, null, null);
hit.setDocumentField("a.b.c", field);
hit.setDocumentField(field);
assertThat(fe.extract(hit), is(value));
}
@ -164,7 +164,7 @@ public class FieldHitExtractorTests extends AbstractSqlWireSerializingTestCase<F
Object value = randomValue();
DocumentField field = new DocumentField("a", asList(value, value));
SearchHit hit = SearchHit.unpooled(1, null);
hit.setDocumentField("a", field);
hit.setDocumentField(field);
Exception ex = expectThrows(InvalidArgumentException.class, () -> fe.extract(hit));
assertThat(ex.getMessage(), is("Arrays (returned by [a]) are not supported"));
}
@ -175,7 +175,7 @@ public class FieldHitExtractorTests extends AbstractSqlWireSerializingTestCase<F
Object valueB = randomValue();
DocumentField field = new DocumentField("a", asList(valueA, valueB));
SearchHit hit = SearchHit.unpooled(1, null);
hit.setDocumentField("a", field);
hit.setDocumentField(field);
assertEquals(valueA, fe.extract(hit));
}
@ -188,7 +188,7 @@ public class FieldHitExtractorTests extends AbstractSqlWireSerializingTestCase<F
map.put("type", "Point");
DocumentField field = new DocumentField(fieldName, singletonList(map));
SearchHit hit = SearchHit.unpooled(1, null);
hit.setDocumentField(fieldName, field);
hit.setDocumentField(field);
assertEquals(new GeoShape(1, 2), fe.extract(hit));
}
@ -205,14 +205,14 @@ public class FieldHitExtractorTests extends AbstractSqlWireSerializingTestCase<F
map2.put("type", "Point");
DocumentField field = new DocumentField(fieldName, asList(map1, map2));
SearchHit hit = SearchHit.unpooled(1, null);
hit.setDocumentField(fieldName, field);
hit.setDocumentField(field);
Exception ex = expectThrows(InvalidArgumentException.class, () -> fe.extract(hit));
assertThat(ex.getMessage(), is("Arrays (returned by [" + fieldName + "]) are not supported"));
FieldHitExtractor lenientFe = new FieldHitExtractor(fieldName, randomBoolean() ? GEO_SHAPE : SHAPE, UTC, LENIENT);
SearchHit searchHit = SearchHit.unpooled(1, "1");
searchHit.setDocumentField(fieldName, new DocumentField(fieldName, singletonList(map2)));
searchHit.setDocumentField(new DocumentField(fieldName, singletonList(map2)));
assertEquals(new GeoShape(3, 4), lenientFe.extract(searchHit));
}
@ -224,7 +224,7 @@ public class FieldHitExtractorTests extends AbstractSqlWireSerializingTestCase<F
String fieldName = randomAlphaOfLength(10);
DocumentField field = new DocumentField(fieldName, singletonList(value));
SearchHit hit = SearchHit.unpooled(1, null);
hit.setDocumentField(fieldName, field);
hit.setDocumentField(field);
FieldHitExtractor fe = new FieldHitExtractor(fieldName, UNSIGNED_LONG, randomZone(), randomBoolean() ? NONE : LENIENT);
assertEquals(bi, fe.extract(hit));
@ -238,7 +238,7 @@ public class FieldHitExtractorTests extends AbstractSqlWireSerializingTestCase<F
String fieldName = randomAlphaOfLength(10);
DocumentField field = new DocumentField(fieldName, singletonList(value));
SearchHit hit = SearchHit.unpooled(1, null);
hit.setDocumentField(fieldName, field);
hit.setDocumentField(field);
FieldHitExtractor fe = new FieldHitExtractor(fieldName, VERSION, randomZone(), randomBoolean() ? NONE : LENIENT);
assertEquals(version.toString(), fe.extract(hit).toString());