mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-04-25 15:47:23 -04:00
Co-authored-by: James Rodewig <40268737+jrodewig@users.noreply.github.com> Co-authored-by: xiaozhiliaoo(小知了) <772654204@qq.com>
425 lines
8.8 KiB
Text
425 lines
8.8 KiB
Text
[[search-aggregations-bucket-range-aggregation]]
|
|
=== Range aggregation
|
|
++++
|
|
<titleabbrev>Range</titleabbrev>
|
|
++++
|
|
|
|
A multi-bucket value source based aggregation that enables the user to define a set of ranges - each representing a bucket. During the aggregation process, the values extracted from each document will be checked against each bucket range and "bucket" the relevant/matching document.
|
|
Note that this aggregation includes the `from` value and excludes the `to` value for each range.
|
|
|
|
Example:
|
|
|
|
[source,console,id=range-aggregation-example]
|
|
----
|
|
GET sales/_search
|
|
{
|
|
"aggs": {
|
|
"price_ranges": {
|
|
"range": {
|
|
"field": "price",
|
|
"ranges": [
|
|
{ "to": 100.0 },
|
|
{ "from": 100.0, "to": 200.0 },
|
|
{ "from": 200.0 }
|
|
]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
----
|
|
// TEST[setup:sales]
|
|
// TEST[s/_search/_search\?filter_path=aggregations/]
|
|
|
|
Response:
|
|
|
|
[source,console-result]
|
|
----
|
|
{
|
|
...
|
|
"aggregations": {
|
|
"price_ranges": {
|
|
"buckets": [
|
|
{
|
|
"key": "*-100.0",
|
|
"to": 100.0,
|
|
"doc_count": 2
|
|
},
|
|
{
|
|
"key": "100.0-200.0",
|
|
"from": 100.0,
|
|
"to": 200.0,
|
|
"doc_count": 2
|
|
},
|
|
{
|
|
"key": "200.0-*",
|
|
"from": 200.0,
|
|
"doc_count": 3
|
|
}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
----
|
|
// TESTRESPONSE[s/\.\.\.//]
|
|
|
|
==== Keyed Response
|
|
|
|
Setting the `keyed` flag to `true` will associate a unique string key with each bucket and return the ranges as a hash rather than an array:
|
|
|
|
[source,console,id=range-aggregation-keyed-example]
|
|
----
|
|
GET sales/_search
|
|
{
|
|
"aggs": {
|
|
"price_ranges": {
|
|
"range": {
|
|
"field": "price",
|
|
"keyed": true,
|
|
"ranges": [
|
|
{ "to": 100 },
|
|
{ "from": 100, "to": 200 },
|
|
{ "from": 200 }
|
|
]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
----
|
|
// TEST[setup:sales]
|
|
// TEST[s/_search/_search\?filter_path=aggregations/]
|
|
|
|
Response:
|
|
|
|
[source,console-result]
|
|
----
|
|
{
|
|
...
|
|
"aggregations": {
|
|
"price_ranges": {
|
|
"buckets": {
|
|
"*-100.0": {
|
|
"to": 100.0,
|
|
"doc_count": 2
|
|
},
|
|
"100.0-200.0": {
|
|
"from": 100.0,
|
|
"to": 200.0,
|
|
"doc_count": 2
|
|
},
|
|
"200.0-*": {
|
|
"from": 200.0,
|
|
"doc_count": 3
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
----
|
|
// TESTRESPONSE[s/\.\.\.//]
|
|
|
|
It is also possible to customize the key for each range:
|
|
|
|
[source,console,id=range-aggregation-custom-keys-example]
|
|
----
|
|
GET sales/_search
|
|
{
|
|
"aggs": {
|
|
"price_ranges": {
|
|
"range": {
|
|
"field": "price",
|
|
"keyed": true,
|
|
"ranges": [
|
|
{ "key": "cheap", "to": 100 },
|
|
{ "key": "average", "from": 100, "to": 200 },
|
|
{ "key": "expensive", "from": 200 }
|
|
]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
----
|
|
// TEST[setup:sales]
|
|
// TEST[s/_search/_search\?filter_path=aggregations/]
|
|
|
|
Response:
|
|
|
|
[source,console-result]
|
|
----
|
|
{
|
|
...
|
|
"aggregations": {
|
|
"price_ranges": {
|
|
"buckets": {
|
|
"cheap": {
|
|
"to": 100.0,
|
|
"doc_count": 2
|
|
},
|
|
"average": {
|
|
"from": 100.0,
|
|
"to": 200.0,
|
|
"doc_count": 2
|
|
},
|
|
"expensive": {
|
|
"from": 200.0,
|
|
"doc_count": 3
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
----
|
|
// TESTRESPONSE[s/\.\.\.//]
|
|
|
|
==== Script
|
|
|
|
If the data in your documents doesn't exactly match what you'd like to aggregate,
|
|
use a <<runtime,runtime field>>. For example, if you need to
|
|
apply a particular currency conversion rate:
|
|
|
|
[source,console,id=range-aggregation-runtime-field-example]
|
|
----
|
|
GET sales/_search
|
|
{
|
|
"runtime_mappings": {
|
|
"price.euros": {
|
|
"type": "double",
|
|
"script": {
|
|
"source": """
|
|
emit(doc['price'].value * params.conversion_rate)
|
|
""",
|
|
"params": {
|
|
"conversion_rate": 0.835526591
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"aggs": {
|
|
"price_ranges": {
|
|
"range": {
|
|
"field": "price.euros",
|
|
"ranges": [
|
|
{ "to": 100 },
|
|
{ "from": 100, "to": 200 },
|
|
{ "from": 200 }
|
|
]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
----
|
|
// TEST[setup:sales]
|
|
// TEST[s/_search/_search\?filter_path=aggregations/]
|
|
|
|
//////////////////////////
|
|
|
|
[source,console-result]
|
|
----
|
|
{
|
|
"aggregations": {
|
|
"price_ranges": {
|
|
"buckets": [
|
|
{
|
|
"key": "*-100.0",
|
|
"to": 100.0,
|
|
"doc_count": 2
|
|
},
|
|
{
|
|
"key": "100.0-200.0",
|
|
"from": 100.0,
|
|
"to": 200.0,
|
|
"doc_count": 5
|
|
},
|
|
{
|
|
"key": "200.0-*",
|
|
"from": 200.0,
|
|
"doc_count": 0
|
|
}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
----
|
|
|
|
//////////////////////////
|
|
|
|
==== Sub Aggregations
|
|
|
|
The following example, not only "bucket" the documents to the different buckets but also computes statistics over the prices in each price range
|
|
|
|
[source,console,id=range-aggregation-sub-aggregation-example]
|
|
----
|
|
GET sales/_search
|
|
{
|
|
"aggs": {
|
|
"price_ranges": {
|
|
"range": {
|
|
"field": "price",
|
|
"ranges": [
|
|
{ "to": 100 },
|
|
{ "from": 100, "to": 200 },
|
|
{ "from": 200 }
|
|
]
|
|
},
|
|
"aggs": {
|
|
"price_stats": {
|
|
"stats": { "field": "price" }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
----
|
|
// TEST[setup:sales]
|
|
// TEST[s/_search/_search\?filter_path=aggregations/]
|
|
|
|
Response:
|
|
|
|
[source,console-result]
|
|
----
|
|
{
|
|
...
|
|
"aggregations": {
|
|
"price_ranges": {
|
|
"buckets": [
|
|
{
|
|
"key": "*-100.0",
|
|
"to": 100.0,
|
|
"doc_count": 2,
|
|
"price_stats": {
|
|
"count": 2,
|
|
"min": 10.0,
|
|
"max": 50.0,
|
|
"avg": 30.0,
|
|
"sum": 60.0
|
|
}
|
|
},
|
|
{
|
|
"key": "100.0-200.0",
|
|
"from": 100.0,
|
|
"to": 200.0,
|
|
"doc_count": 2,
|
|
"price_stats": {
|
|
"count": 2,
|
|
"min": 150.0,
|
|
"max": 175.0,
|
|
"avg": 162.5,
|
|
"sum": 325.0
|
|
}
|
|
},
|
|
{
|
|
"key": "200.0-*",
|
|
"from": 200.0,
|
|
"doc_count": 3,
|
|
"price_stats": {
|
|
"count": 3,
|
|
"min": 200.0,
|
|
"max": 200.0,
|
|
"avg": 200.0,
|
|
"sum": 600.0
|
|
}
|
|
}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
----
|
|
// TESTRESPONSE[s/\.\.\.//]
|
|
|
|
[[search-aggregations-bucket-range-aggregation-histogram-fields]]
|
|
==== Histogram fields
|
|
|
|
Running a range aggregation over histogram fields computes the total number of counts for each configured range.
|
|
|
|
This is done without interpolating between the histogram field values. Consequently, it is possible to have a range
|
|
that is "in-between" two histogram values. The resulting range bucket would have a zero doc count.
|
|
|
|
Here is an example, executing a range aggregation against the following index that stores pre-aggregated histograms
|
|
with latency metrics (in milliseconds) for different networks:
|
|
|
|
[source,console]
|
|
----
|
|
PUT metrics_index/_doc/1
|
|
{
|
|
"network.name" : "net-1",
|
|
"latency_histo" : {
|
|
"values" : [1, 3, 8, 12, 15],
|
|
"counts" : [3, 7, 23, 12, 6]
|
|
}
|
|
}
|
|
|
|
PUT metrics_index/_doc/2
|
|
{
|
|
"network.name" : "net-2",
|
|
"latency_histo" : {
|
|
"values" : [1, 6, 8, 12, 14],
|
|
"counts" : [8, 17, 8, 7, 6]
|
|
}
|
|
}
|
|
|
|
GET metrics_index/_search?size=0&filter_path=aggregations
|
|
{
|
|
"aggs": {
|
|
"latency_ranges": {
|
|
"range": {
|
|
"field": "latency_histo",
|
|
"ranges": [
|
|
{"to": 2},
|
|
{"from": 2, "to": 3},
|
|
{"from": 3, "to": 10},
|
|
{"from": 10}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
----
|
|
|
|
The `range` aggregation will sum the counts of each range computed based on the `values` and
|
|
return the following output:
|
|
|
|
[source,console-result]
|
|
----
|
|
{
|
|
"aggregations": {
|
|
"latency_ranges": {
|
|
"buckets": [
|
|
{
|
|
"key": "*-2.0",
|
|
"to": 2,
|
|
"doc_count": 11
|
|
},
|
|
{
|
|
"key": "2.0-3.0",
|
|
"from": 2,
|
|
"to": 3,
|
|
"doc_count": 0
|
|
},
|
|
{
|
|
"key": "3.0-10.0",
|
|
"from": 3,
|
|
"to": 10,
|
|
"doc_count": 55
|
|
},
|
|
{
|
|
"key": "10.0-*",
|
|
"from": 10,
|
|
"doc_count": 31
|
|
}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
----
|
|
// TESTRESPONSE[skip:test not setup]
|
|
|
|
[IMPORTANT]
|
|
========
|
|
Range aggregation is a bucket aggregation, which partitions documents into buckets rather than calculating metrics over fields like
|
|
metrics aggregations do. Each bucket represents a collection of documents which sub-aggregations can run on.
|
|
On the other hand, a histogram field is a pre-aggregated field representing multiple values inside a single field:
|
|
buckets of numerical data and a count of items/documents for each bucket. This mismatch between the range aggregations expected input
|
|
(expecting raw documents) and the histogram field (that provides summary information) limits the outcome of the aggregation
|
|
to only the doc counts for each bucket.
|
|
|
|
**Consequently, when executing a range aggregation over a histogram field, no sub-aggregations are allowed.**
|
|
========
|