mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-29 01:44:36 -04:00
Synthetic _source: support histogram field (#89833)
Adds support for the `histogram` field type to synthetic _source. 
This commit is contained in:
parent
f8d1d2afa6
commit
b667aa33f0
7 changed files with 213 additions and 3 deletions
5
docs/changelog/89833.yaml
Normal file
5
docs/changelog/89833.yaml
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
pr: 89833
|
||||||
|
summary: "Synthetic _source: support histogram field"
|
||||||
|
area: TSDB
|
||||||
|
type: enhancement
|
||||||
|
issues: []
|
|
@ -35,6 +35,7 @@ types:
|
||||||
** <<numeric-synthetic-source,`float`>>
|
** <<numeric-synthetic-source,`float`>>
|
||||||
** <<geo-point-synthetic-source,`geo_point`>>
|
** <<geo-point-synthetic-source,`geo_point`>>
|
||||||
** <<numeric-synthetic-source,`half_float`>>
|
** <<numeric-synthetic-source,`half_float`>>
|
||||||
|
** <<histogram-synthetic-source,`histogram`>>
|
||||||
** <<numeric-synthetic-source,`integer`>>
|
** <<numeric-synthetic-source,`integer`>>
|
||||||
** <<ip-synthetic-source,`ip`>>
|
** <<ip-synthetic-source,`ip`>>
|
||||||
** <<keyword-synthetic-source,`keyword`>>
|
** <<keyword-synthetic-source,`keyword`>>
|
||||||
|
|
|
@ -85,6 +85,12 @@ The histogram field is "algorithm agnostic" and does not store data specific to
|
||||||
means the field can technically be aggregated with either algorithm, in practice the user should chose one algorithm and
|
means the field can technically be aggregated with either algorithm, in practice the user should chose one algorithm and
|
||||||
index data in that manner (e.g. centroids for T-Digest or intervals for HDRHistogram) to ensure best accuracy.
|
index data in that manner (e.g. centroids for T-Digest or intervals for HDRHistogram) to ensure best accuracy.
|
||||||
|
|
||||||
|
[[histogram-synthetic-source]]
|
||||||
|
==== Synthetic source preview:[]
|
||||||
|
`histogram` fields support <<synthetic-source,synthetic `_source`>> in their
|
||||||
|
default configuration. Synthetic `_source` cannot be used together with
|
||||||
|
<<ignore-malformed,`ignore_malformed`>> or <<copy-to,`copy_to`>>.
|
||||||
|
|
||||||
[[histogram-ex]]
|
[[histogram-ex]]
|
||||||
==== Examples
|
==== Examples
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,7 @@ import org.apache.lucene.index.DocValuesType;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.index.IndexableFieldType;
|
import org.apache.lucene.index.IndexableFieldType;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.NoMergePolicy;
|
import org.apache.lucene.index.NoMergePolicy;
|
||||||
import org.apache.lucene.search.FieldExistsQuery;
|
import org.apache.lucene.search.FieldExistsQuery;
|
||||||
|
@ -71,6 +72,7 @@ import static org.hamcrest.Matchers.containsString;
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
import static org.hamcrest.Matchers.instanceOf;
|
import static org.hamcrest.Matchers.instanceOf;
|
||||||
import static org.hamcrest.Matchers.matchesPattern;
|
import static org.hamcrest.Matchers.matchesPattern;
|
||||||
|
import static org.hamcrest.Matchers.nullValue;
|
||||||
import static org.mockito.Mockito.mock;
|
import static org.mockito.Mockito.mock;
|
||||||
import static org.mockito.Mockito.when;
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
@ -910,6 +912,32 @@ public abstract class MapperTestCase extends MapperServiceTestCase {
|
||||||
assertThat(syntheticSource(mapper, b -> b.startArray("field").endArray()), equalTo("{}"));
|
assertThat(syntheticSource(mapper, b -> b.startArray("field").endArray()), equalTo("{}"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public final void testSyntheticEmptyListNoDocValuesLoader() throws IOException {
|
||||||
|
assumeTrue("Field does not support [] as input", supportsEmptyInputArray());
|
||||||
|
SyntheticSourceExample syntheticSourceExample = syntheticSourceSupport().example(5);
|
||||||
|
DocumentMapper mapper = createDocumentMapper(syntheticSourceMapping(b -> {
|
||||||
|
b.startObject("field");
|
||||||
|
syntheticSourceExample.mapping().accept(b);
|
||||||
|
b.endObject();
|
||||||
|
}));
|
||||||
|
try (Directory directory = newDirectory()) {
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), directory);
|
||||||
|
LuceneDocument doc = mapper.parse(source(b -> b.startArray("field").endArray())).rootDoc();
|
||||||
|
iw.addDocument(doc);
|
||||||
|
iw.close();
|
||||||
|
try (DirectoryReader reader = DirectoryReader.open(directory)) {
|
||||||
|
LeafReader leafReader = getOnlyLeafReader(reader);
|
||||||
|
SourceLoader.SyntheticFieldLoader fieldLoader = mapper.mapping().getRoot().getMapper("field").syntheticFieldLoader();
|
||||||
|
/*
|
||||||
|
* null means "there are no values for this field, don't call me".
|
||||||
|
* Empty fields are common enough that we need to make sure this
|
||||||
|
* optimization kicks in.
|
||||||
|
*/
|
||||||
|
assertThat(fieldLoader.docValuesLoader(leafReader, new int[] { 0 }), nullValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public final void testSyntheticSourceInvalid() throws IOException {
|
public final void testSyntheticSourceInvalid() throws IOException {
|
||||||
List<SyntheticSourceInvalidExample> examples = new ArrayList<>(syntheticSourceSupport().invalidExample());
|
List<SyntheticSourceInvalidExample> examples = new ArrayList<>(syntheticSourceSupport().invalidExample());
|
||||||
if (supportsCopyTo()) {
|
if (supportsCopyTo()) {
|
||||||
|
|
|
@ -10,6 +10,7 @@ import org.apache.lucene.document.BinaryDocValuesField;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.index.BinaryDocValues;
|
import org.apache.lucene.index.BinaryDocValues;
|
||||||
import org.apache.lucene.index.DocValues;
|
import org.apache.lucene.index.DocValues;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
|
@ -31,6 +32,7 @@ import org.elasticsearch.index.mapper.FieldMapper;
|
||||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||||
import org.elasticsearch.index.mapper.MapperBuilderContext;
|
import org.elasticsearch.index.mapper.MapperBuilderContext;
|
||||||
import org.elasticsearch.index.mapper.MapperParsingException;
|
import org.elasticsearch.index.mapper.MapperParsingException;
|
||||||
|
import org.elasticsearch.index.mapper.SourceLoader;
|
||||||
import org.elasticsearch.index.mapper.SourceValueFetcher;
|
import org.elasticsearch.index.mapper.SourceValueFetcher;
|
||||||
import org.elasticsearch.index.mapper.TextSearchInfo;
|
import org.elasticsearch.index.mapper.TextSearchInfo;
|
||||||
import org.elasticsearch.index.mapper.TimeSeriesParams;
|
import org.elasticsearch.index.mapper.TimeSeriesParams;
|
||||||
|
@ -42,6 +44,7 @@ import org.elasticsearch.search.MultiValueMode;
|
||||||
import org.elasticsearch.search.sort.BucketedSort;
|
import org.elasticsearch.search.sort.BucketedSort;
|
||||||
import org.elasticsearch.search.sort.SortOrder;
|
import org.elasticsearch.search.sort.SortOrder;
|
||||||
import org.elasticsearch.xcontent.ParseField;
|
import org.elasticsearch.xcontent.ParseField;
|
||||||
|
import org.elasticsearch.xcontent.XContentBuilder;
|
||||||
import org.elasticsearch.xcontent.XContentParser;
|
import org.elasticsearch.xcontent.XContentParser;
|
||||||
import org.elasticsearch.xcontent.XContentSubParser;
|
import org.elasticsearch.xcontent.XContentSubParser;
|
||||||
import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSourceType;
|
import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSourceType;
|
||||||
|
@ -49,6 +52,7 @@ import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSou
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken;
|
import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken;
|
||||||
|
|
||||||
|
@ -447,4 +451,74 @@ public class HistogramFieldMapper extends FieldMapper {
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
|
||||||
|
if (ignoreMalformed.value()) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"field [" + name() + "] of type [histogram] doesn't support synthetic source because it ignores malformed histograms"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (copyTo.copyToFields().isEmpty() != true) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"field [" + name() + "] of type [histogram] doesn't support synthetic source because it declares copy_to"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return new SourceLoader.SyntheticFieldLoader() {
|
||||||
|
private final InternalHistogramValue value = new InternalHistogramValue();
|
||||||
|
private BytesRef binaryValue;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Stream<Map.Entry<String, StoredFieldLoader>> storedFieldLoaders() {
|
||||||
|
return Stream.of();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
|
||||||
|
BinaryDocValues docValues = leafReader.getBinaryDocValues(fieldType().name());
|
||||||
|
if (docValues == null) {
|
||||||
|
// No values in this leaf
|
||||||
|
binaryValue = null;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return docId -> {
|
||||||
|
if (docValues.advanceExact(docId)) {
|
||||||
|
binaryValue = docValues.binaryValue();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
binaryValue = null;
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasValue() {
|
||||||
|
return binaryValue != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(XContentBuilder b) throws IOException {
|
||||||
|
if (binaryValue == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
b.startObject(simpleName());
|
||||||
|
|
||||||
|
value.reset(binaryValue);
|
||||||
|
b.startArray("values");
|
||||||
|
while (value.next()) {
|
||||||
|
b.value(value.value());
|
||||||
|
}
|
||||||
|
b.endArray();
|
||||||
|
|
||||||
|
value.reset(binaryValue);
|
||||||
|
b.startArray("counts");
|
||||||
|
while (value.next()) {
|
||||||
|
b.value(value.count());
|
||||||
|
}
|
||||||
|
b.endArray();
|
||||||
|
|
||||||
|
b.endObject();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,11 +19,14 @@ import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
|
||||||
import org.junit.AssumptionViolatedException;
|
import org.junit.AssumptionViolatedException;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import static org.hamcrest.Matchers.containsString;
|
import static org.hamcrest.Matchers.containsString;
|
||||||
|
import static org.hamcrest.Matchers.matchesPattern;
|
||||||
import static org.hamcrest.Matchers.notNullValue;
|
import static org.hamcrest.Matchers.notNullValue;
|
||||||
import static org.hamcrest.Matchers.nullValue;
|
import static org.hamcrest.Matchers.nullValue;
|
||||||
|
|
||||||
|
@ -347,12 +350,59 @@ public class HistogramFieldMapperTests extends MapperTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected SyntheticSourceSupport syntheticSourceSupport() {
|
protected IngestScriptSupport ingestScriptSupport() {
|
||||||
throw new AssumptionViolatedException("not supported");
|
throw new AssumptionViolatedException("not supported");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected IngestScriptSupport ingestScriptSupport() {
|
protected SyntheticSourceSupport syntheticSourceSupport() {
|
||||||
throw new AssumptionViolatedException("not supported");
|
return new HistogramFieldSyntheticSourceSupport();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class HistogramFieldSyntheticSourceSupport implements SyntheticSourceSupport {
|
||||||
|
@Override
|
||||||
|
public SyntheticSourceExample example(int maxVals) {
|
||||||
|
if (randomBoolean()) {
|
||||||
|
Map<String, Object> value = new LinkedHashMap<>();
|
||||||
|
value.put("values", List.of(randomDouble()));
|
||||||
|
value.put("counts", List.of(randomCount()));
|
||||||
|
return new SyntheticSourceExample(value, value, this::mapping);
|
||||||
|
}
|
||||||
|
int size = between(1, maxVals);
|
||||||
|
List<Double> values = new ArrayList<>(size);
|
||||||
|
double prev = randomDouble();
|
||||||
|
values.add(prev);
|
||||||
|
while (values.size() < size && prev != Double.MAX_VALUE) {
|
||||||
|
prev = randomDoubleBetween(prev, Double.MAX_VALUE, false);
|
||||||
|
values.add(prev);
|
||||||
|
}
|
||||||
|
Map<String, Object> value = new LinkedHashMap<>();
|
||||||
|
value.put("values", values);
|
||||||
|
value.put("counts", randomList(values.size(), values.size(), this::randomCount));
|
||||||
|
return new SyntheticSourceExample(value, value, this::mapping);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int randomCount() {
|
||||||
|
return between(1, Integer.MAX_VALUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void mapping(XContentBuilder b) throws IOException {
|
||||||
|
b.field("type", "histogram");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<SyntheticSourceInvalidExample> invalidExample() throws IOException {
|
||||||
|
return List.of(
|
||||||
|
new SyntheticSourceInvalidExample(
|
||||||
|
matchesPattern(
|
||||||
|
"field \\[field] of type \\[histogram] doesn't support synthetic source because it ignores malformed histograms"
|
||||||
|
),
|
||||||
|
b -> {
|
||||||
|
b.field("type", "histogram");
|
||||||
|
b.field("ignore_malformed", true);
|
||||||
|
}
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -249,3 +249,49 @@ histogram with wrong time series mappings:
|
||||||
latency:
|
latency:
|
||||||
type: histogram
|
type: histogram
|
||||||
time_series_metric: counter
|
time_series_metric: counter
|
||||||
|
|
||||||
|
---
|
||||||
|
histogram with synthetic source:
|
||||||
|
- skip:
|
||||||
|
version: " - 8.4.99"
|
||||||
|
reason: introduced in 8.5.0
|
||||||
|
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: histo_synthetic
|
||||||
|
body:
|
||||||
|
mappings:
|
||||||
|
_source:
|
||||||
|
mode: synthetic
|
||||||
|
properties:
|
||||||
|
latency:
|
||||||
|
type: histogram
|
||||||
|
- do:
|
||||||
|
bulk:
|
||||||
|
index: histo_synthetic
|
||||||
|
refresh: true
|
||||||
|
body:
|
||||||
|
- '{"index": {"_id": 1}}'
|
||||||
|
- '{"latency": {"values" : [0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [3, 7, 23, 12, 6]}}'
|
||||||
|
- '{"index": {"_id": 2}}'
|
||||||
|
- '{"latency": {"values" : [0, 0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [3, 2, 5, 10, 1, 8]}}'
|
||||||
|
|
||||||
|
- do:
|
||||||
|
get:
|
||||||
|
index: histo_synthetic
|
||||||
|
id: 1
|
||||||
|
- match:
|
||||||
|
_source:
|
||||||
|
latency:
|
||||||
|
values: [0.1, 0.2, 0.3, 0.4, 0.5]
|
||||||
|
counts: [3, 7, 23, 12, 6]
|
||||||
|
|
||||||
|
- do:
|
||||||
|
get:
|
||||||
|
index: histo_synthetic
|
||||||
|
id: 2
|
||||||
|
- match:
|
||||||
|
_source:
|
||||||
|
latency:
|
||||||
|
values: [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
|
||||||
|
counts: [3, 2, 5, 10, 1, 8]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue