Synthetic _source: support histogram field (#89833)

Adds support for the `histogram` field type to synthetic _source.

![image](https://user-images.githubusercontent.com/215970/188691249-9d23d1dc-64ab-49a4-8b24-f60fc966c0ac.png)
This commit is contained in:
Nik Everett 2022-09-07 12:25:38 -04:00 committed by GitHub
parent f8d1d2afa6
commit b667aa33f0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 213 additions and 3 deletions

View file

@ -0,0 +1,5 @@
pr: 89833
summary: "Synthetic _source: support histogram field"
area: TSDB
type: enhancement
issues: []

View file

@ -35,6 +35,7 @@ types:
** <<numeric-synthetic-source,`float`>> ** <<numeric-synthetic-source,`float`>>
** <<geo-point-synthetic-source,`geo_point`>> ** <<geo-point-synthetic-source,`geo_point`>>
** <<numeric-synthetic-source,`half_float`>> ** <<numeric-synthetic-source,`half_float`>>
** <<histogram-synthetic-source,`histogram`>>
** <<numeric-synthetic-source,`integer`>> ** <<numeric-synthetic-source,`integer`>>
** <<ip-synthetic-source,`ip`>> ** <<ip-synthetic-source,`ip`>>
** <<keyword-synthetic-source,`keyword`>> ** <<keyword-synthetic-source,`keyword`>>

View file

@ -85,6 +85,12 @@ The histogram field is "algorithm agnostic" and does not store data specific to
means the field can technically be aggregated with either algorithm, in practice the user should chose one algorithm and means the field can technically be aggregated with either algorithm, in practice the user should chose one algorithm and
index data in that manner (e.g. centroids for T-Digest or intervals for HDRHistogram) to ensure best accuracy. index data in that manner (e.g. centroids for T-Digest or intervals for HDRHistogram) to ensure best accuracy.
[[histogram-synthetic-source]]
==== Synthetic source preview:[]
`histogram` fields support <<synthetic-source,synthetic `_source`>> in their
default configuration. Synthetic `_source` cannot be used together with
<<ignore-malformed,`ignore_malformed`>> or <<copy-to,`copy_to`>>.
[[histogram-ex]] [[histogram-ex]]
==== Examples ==== Examples

View file

@ -13,6 +13,7 @@ import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.search.FieldExistsQuery; import org.apache.lucene.search.FieldExistsQuery;
@ -71,6 +72,7 @@ import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.matchesPattern; import static org.hamcrest.Matchers.matchesPattern;
import static org.hamcrest.Matchers.nullValue;
import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
@ -910,6 +912,32 @@ public abstract class MapperTestCase extends MapperServiceTestCase {
assertThat(syntheticSource(mapper, b -> b.startArray("field").endArray()), equalTo("{}")); assertThat(syntheticSource(mapper, b -> b.startArray("field").endArray()), equalTo("{}"));
} }
public final void testSyntheticEmptyListNoDocValuesLoader() throws IOException {
assumeTrue("Field does not support [] as input", supportsEmptyInputArray());
SyntheticSourceExample syntheticSourceExample = syntheticSourceSupport().example(5);
DocumentMapper mapper = createDocumentMapper(syntheticSourceMapping(b -> {
b.startObject("field");
syntheticSourceExample.mapping().accept(b);
b.endObject();
}));
try (Directory directory = newDirectory()) {
RandomIndexWriter iw = new RandomIndexWriter(random(), directory);
LuceneDocument doc = mapper.parse(source(b -> b.startArray("field").endArray())).rootDoc();
iw.addDocument(doc);
iw.close();
try (DirectoryReader reader = DirectoryReader.open(directory)) {
LeafReader leafReader = getOnlyLeafReader(reader);
SourceLoader.SyntheticFieldLoader fieldLoader = mapper.mapping().getRoot().getMapper("field").syntheticFieldLoader();
/*
* null means "there are no values for this field, don't call me".
* Empty fields are common enough that we need to make sure this
* optimization kicks in.
*/
assertThat(fieldLoader.docValuesLoader(leafReader, new int[] { 0 }), nullValue());
}
}
}
public final void testSyntheticSourceInvalid() throws IOException { public final void testSyntheticSourceInvalid() throws IOException {
List<SyntheticSourceInvalidExample> examples = new ArrayList<>(syntheticSourceSupport().invalidExample()); List<SyntheticSourceInvalidExample> examples = new ArrayList<>(syntheticSourceSupport().invalidExample());
if (supportsCopyTo()) { if (supportsCopyTo()) {

View file

@ -10,6 +10,7 @@ import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
@ -31,6 +32,7 @@ import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperBuilderContext; import org.elasticsearch.index.mapper.MapperBuilderContext;
import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.index.mapper.SourceValueFetcher; import org.elasticsearch.index.mapper.SourceValueFetcher;
import org.elasticsearch.index.mapper.TextSearchInfo; import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.index.mapper.TimeSeriesParams; import org.elasticsearch.index.mapper.TimeSeriesParams;
@ -42,6 +44,7 @@ import org.elasticsearch.search.MultiValueMode;
import org.elasticsearch.search.sort.BucketedSort; import org.elasticsearch.search.sort.BucketedSort;
import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.search.sort.SortOrder;
import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentSubParser; import org.elasticsearch.xcontent.XContentSubParser;
import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSourceType; import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSourceType;
@ -49,6 +52,7 @@ import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSou
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Map; import java.util.Map;
import java.util.stream.Stream;
import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken;
@ -447,4 +451,74 @@ public class HistogramFieldMapper extends FieldMapper {
return count; return count;
} }
} }
@Override
public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
if (ignoreMalformed.value()) {
throw new IllegalArgumentException(
"field [" + name() + "] of type [histogram] doesn't support synthetic source because it ignores malformed histograms"
);
}
if (copyTo.copyToFields().isEmpty() != true) {
throw new IllegalArgumentException(
"field [" + name() + "] of type [histogram] doesn't support synthetic source because it declares copy_to"
);
}
return new SourceLoader.SyntheticFieldLoader() {
private final InternalHistogramValue value = new InternalHistogramValue();
private BytesRef binaryValue;
@Override
public Stream<Map.Entry<String, StoredFieldLoader>> storedFieldLoaders() {
return Stream.of();
}
@Override
public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
BinaryDocValues docValues = leafReader.getBinaryDocValues(fieldType().name());
if (docValues == null) {
// No values in this leaf
binaryValue = null;
return null;
}
return docId -> {
if (docValues.advanceExact(docId)) {
binaryValue = docValues.binaryValue();
return true;
}
binaryValue = null;
return false;
};
}
@Override
public boolean hasValue() {
return binaryValue != null;
}
@Override
public void write(XContentBuilder b) throws IOException {
if (binaryValue == null) {
return;
}
b.startObject(simpleName());
value.reset(binaryValue);
b.startArray("values");
while (value.next()) {
b.value(value.value());
}
b.endArray();
value.reset(binaryValue);
b.startArray("counts");
while (value.next()) {
b.value(value.count());
}
b.endArray();
b.endObject();
}
};
}
} }

View file

@ -19,11 +19,14 @@ import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
import org.junit.AssumptionViolatedException; import org.junit.AssumptionViolatedException;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.matchesPattern;
import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.notNullValue;
import static org.hamcrest.Matchers.nullValue; import static org.hamcrest.Matchers.nullValue;
@ -347,12 +350,59 @@ public class HistogramFieldMapperTests extends MapperTestCase {
} }
@Override @Override
protected SyntheticSourceSupport syntheticSourceSupport() { protected IngestScriptSupport ingestScriptSupport() {
throw new AssumptionViolatedException("not supported"); throw new AssumptionViolatedException("not supported");
} }
@Override @Override
protected IngestScriptSupport ingestScriptSupport() { protected SyntheticSourceSupport syntheticSourceSupport() {
throw new AssumptionViolatedException("not supported"); return new HistogramFieldSyntheticSourceSupport();
}
private static class HistogramFieldSyntheticSourceSupport implements SyntheticSourceSupport {
@Override
public SyntheticSourceExample example(int maxVals) {
if (randomBoolean()) {
Map<String, Object> value = new LinkedHashMap<>();
value.put("values", List.of(randomDouble()));
value.put("counts", List.of(randomCount()));
return new SyntheticSourceExample(value, value, this::mapping);
}
int size = between(1, maxVals);
List<Double> values = new ArrayList<>(size);
double prev = randomDouble();
values.add(prev);
while (values.size() < size && prev != Double.MAX_VALUE) {
prev = randomDoubleBetween(prev, Double.MAX_VALUE, false);
values.add(prev);
}
Map<String, Object> value = new LinkedHashMap<>();
value.put("values", values);
value.put("counts", randomList(values.size(), values.size(), this::randomCount));
return new SyntheticSourceExample(value, value, this::mapping);
}
private int randomCount() {
return between(1, Integer.MAX_VALUE);
}
private void mapping(XContentBuilder b) throws IOException {
b.field("type", "histogram");
}
@Override
public List<SyntheticSourceInvalidExample> invalidExample() throws IOException {
return List.of(
new SyntheticSourceInvalidExample(
matchesPattern(
"field \\[field] of type \\[histogram] doesn't support synthetic source because it ignores malformed histograms"
),
b -> {
b.field("type", "histogram");
b.field("ignore_malformed", true);
}
)
);
}
} }
} }

View file

@ -249,3 +249,49 @@ histogram with wrong time series mappings:
latency: latency:
type: histogram type: histogram
time_series_metric: counter time_series_metric: counter
---
histogram with synthetic source:
- skip:
version: " - 8.4.99"
reason: introduced in 8.5.0
- do:
indices.create:
index: histo_synthetic
body:
mappings:
_source:
mode: synthetic
properties:
latency:
type: histogram
- do:
bulk:
index: histo_synthetic
refresh: true
body:
- '{"index": {"_id": 1}}'
- '{"latency": {"values" : [0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [3, 7, 23, 12, 6]}}'
- '{"index": {"_id": 2}}'
- '{"latency": {"values" : [0, 0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [3, 2, 5, 10, 1, 8]}}'
- do:
get:
index: histo_synthetic
id: 1
- match:
_source:
latency:
values: [0.1, 0.2, 0.3, 0.4, 0.5]
counts: [3, 7, 23, 12, 6]
- do:
get:
index: histo_synthetic
id: 2
- match:
_source:
latency:
values: [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
counts: [3, 2, 5, 10, 1, 8]