From cf5ea0bb1f16f29b96aa9debde1d78475210acd2 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Thu, 2 Feb 2023 09:25:00 +0100 Subject: [PATCH] [ML] rename frequent_items to frequent_item_sets and make it GA (#93421) rename frequent_items to frequent_item_sets and remove the experimental batch --- docs/changelog/93421.yaml | 10 ++ docs/reference/aggregations/bucket.asciidoc | 2 +- ...> frequent-item-sets-aggregation.asciidoc} | 166 +++++++++--------- .../xpack/ml/MachineLearning.java | 2 +- .../frequentitemsets/EclatMapReducer.java | 1 + .../FrequentItemSetsAggregationBuilder.java | 5 +- ...equentItemSetsAggregationBuilderTests.java | 4 +- ...ems_agg.yml => frequent_item_sets_agg.yml} | 92 +++++++--- 8 files changed, 165 insertions(+), 117 deletions(-) create mode 100644 docs/changelog/93421.yaml rename docs/reference/aggregations/bucket/{frequent-items-aggregation.asciidoc => frequent-item-sets-aggregation.asciidoc} (86%) rename x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/{frequent_items_agg.yml => frequent_item_sets_agg.yml} (88%) diff --git a/docs/changelog/93421.yaml b/docs/changelog/93421.yaml new file mode 100644 index 000000000000..20cb13522f52 --- /dev/null +++ b/docs/changelog/93421.yaml @@ -0,0 +1,10 @@ +pr: 93421 +summary: Make `frequent_item_sets` aggregation GA +area: Machine Learning +type: feature +issues: [] +highlight: + title: Make `frequent_item_sets` aggregation GA + body: The `frequent_item_sets` aggregation has been moved from + technical preview to general availability. + notable: true diff --git a/docs/reference/aggregations/bucket.asciidoc b/docs/reference/aggregations/bucket.asciidoc index a52eb15c9a15..4391d73ebd46 100644 --- a/docs/reference/aggregations/bucket.asciidoc +++ b/docs/reference/aggregations/bucket.asciidoc @@ -36,7 +36,7 @@ include::bucket/filter-aggregation.asciidoc[] include::bucket/filters-aggregation.asciidoc[] -include::bucket/frequent-items-aggregation.asciidoc[] +include::bucket/frequent-item-sets-aggregation.asciidoc[] include::bucket/geodistance-aggregation.asciidoc[] diff --git a/docs/reference/aggregations/bucket/frequent-items-aggregation.asciidoc b/docs/reference/aggregations/bucket/frequent-item-sets-aggregation.asciidoc similarity index 86% rename from docs/reference/aggregations/bucket/frequent-items-aggregation.asciidoc rename to docs/reference/aggregations/bucket/frequent-item-sets-aggregation.asciidoc index f52a31e98f1d..01dacd0d6ccd 100644 --- a/docs/reference/aggregations/bucket/frequent-items-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/frequent-item-sets-aggregation.asciidoc @@ -1,40 +1,38 @@ -[[search-aggregations-bucket-frequent-items-aggregation]] -=== Frequent items aggregation +[[search-aggregations-bucket-frequent-item-sets-aggregation]] +=== Frequent item sets aggregation ++++ -Frequent items +Frequent item sets ++++ -experimental::[] - -A bucket aggregation which finds frequent item sets. It is a form of association -rules mining that identifies items that often occur together. Items that are -frequently purchased together or log events that tend to co-occur are examples -of frequent item sets. Finding frequent item sets helps to discover +A bucket aggregation which finds frequent item sets. It is a form of association +rules mining that identifies items that often occur together. Items that are +frequently purchased together or log events that tend to co-occur are examples +of frequent item sets. Finding frequent item sets helps to discover relationships between different data points (items). -The aggregation reports closed item sets. A frequent item set is called closed -if no superset exists with the same ratio of documents (also known as its -<>). For example, we have the two +The aggregation reports closed item sets. A frequent item set is called closed +if no superset exists with the same ratio of documents (also known as its +<>). For example, we have the two following candidates for a frequent item set, which have the same support value: 1. `apple, orange, banana` 2. `apple, orange, banana, tomato`. -Only the second item set (`apple, orange, banana, tomato`) is returned, and the -first set – which is a subset of the second one – is skipped. Both item sets +Only the second item set (`apple, orange, banana, tomato`) is returned, and the +first set – which is a subset of the second one – is skipped. Both item sets might be returned if their support values are different. -The runtime of the aggregation depends on the data and the provided parameters. -It might take a significant time for the aggregation to complete. For this -reason, it is recommended to use <> to run your +The runtime of the aggregation depends on the data and the provided parameters. +It might take a significant time for the aggregation to complete. For this +reason, it is recommended to use <> to run your requests asynchronously. ==== Syntax -A `frequent_items` aggregation looks like this in isolation: +A `frequent_item_sets` aggregation looks like this in isolation: [source,js] -------------------------------------------------- -"frequent_items": { +"frequent_item_sets": { "minimum_set_size": 3, "fields": [ {"field": "my_field_1"}, @@ -44,74 +42,74 @@ A `frequent_items` aggregation looks like this in isolation: -------------------------------------------------- // NOTCONSOLE -.`frequent_items` Parameters +.`frequent_item_sets` Parameters |=== |Parameter Name |Description |Required |Default Value |`fields` |(array) Fields to analyze. | Required | -|`minimum_set_size` | (integer) The <> of one item set. | Optional | `1` -|`minimum_support` | (integer) The <> of one item set. | Optional | `0.1` +|`minimum_set_size` | (integer) The <> of one item set. | Optional | `1` +|`minimum_support` | (integer) The <> of one item set. | Optional | `0.1` |`size` | (integer) The number of top item sets to return. | Optional | `10` |`filter` | (object) Query that filters documents from the analysis | Optional | `match_all` |=== [discrete] -[[frequent-items-fields]] +[[frequent-item-sets-fields]] ==== Fields -Supported field types for the analyzed fields are keyword, numeric, ip, date, -and arrays of these types. You can also add runtime fields to your analyzed +Supported field types for the analyzed fields are keyword, numeric, ip, date, +and arrays of these types. You can also add runtime fields to your analyzed fields. -If the combined cardinality of the analyzed fields are high, the aggregation +If the combined cardinality of the analyzed fields are high, the aggregation might require a significant amount of system resources. -You can filter the values for each field by using the `include` and `exclude` -parameters. The parameters can be regular expression strings or arrays of -strings of exact terms. The filtered values are removed from the analysis and -therefore reduce the runtime. If both `include` and `exclude` are defined, -`exclude` takes precedence; it means `include` is evaluated first and then +You can filter the values for each field by using the `include` and `exclude` +parameters. The parameters can be regular expression strings or arrays of +strings of exact terms. The filtered values are removed from the analysis and +therefore reduce the runtime. If both `include` and `exclude` are defined, +`exclude` takes precedence; it means `include` is evaluated first and then `exclude`. [discrete] -[[frequent-items-minimum-set-size]] +[[frequent-item-sets-minimum-set-size]] ==== Minimum set size -The minimum set size is the minimum number of items the set needs to contain. A -value of 1 returns the frequency of single items. Only item sets that contain at -least the number of `minimum_set_size` items are returned. For example, the item -set `orange, banana, apple` is returned only if the minimum set size is 3 or +The minimum set size is the minimum number of items the set needs to contain. A +value of 1 returns the frequency of single items. Only item sets that contain at +least the number of `minimum_set_size` items are returned. For example, the item +set `orange, banana, apple` is returned only if the minimum set size is 3 or lower. [discrete] -[[frequent-items-minimum-support]] +[[frequent-item-sets-minimum-support]] ==== Minimum support -The minimum support value is the ratio of documents that an item set must exist -in to be considered "frequent". In particular, it is a normalized value between -0 and 1. It is calculated by dividing the number of documents containing the +The minimum support value is the ratio of documents that an item set must exist +in to be considered "frequent". In particular, it is a normalized value between +0 and 1. It is calculated by dividing the number of documents containing the item set by the total number of documents. -For example, if a given item set is contained by five documents and the total -number of documents is 20, then the support of the item set is 5/20 = 0.25. -Therefore, this set is returned only if the minimum support is 0.25 or lower. -As a higher minimum support prunes more items, the calculation is less resource -intensive. The `minimum_support` parameter has an effect on the required memory +For example, if a given item set is contained by five documents and the total +number of documents is 20, then the support of the item set is 5/20 = 0.25. +Therefore, this set is returned only if the minimum support is 0.25 or lower. +As a higher minimum support prunes more items, the calculation is less resource +intensive. The `minimum_support` parameter has an effect on the required memory and the runtime of the aggregation. [discrete] -[[frequent-items-size]] +[[frequent-item-sets-size]] ==== Size -This parameter defines the maximum number of item sets to return. The result -contains top-k item sets; the item sets with the highest support values. This -parameter has a significant effect on the required memory and the runtime of the +This parameter defines the maximum number of item sets to return. The result +contains top-k item sets; the item sets with the highest support values. This +parameter has a significant effect on the required memory and the runtime of the aggregation. [discrete] -[[frequent-items-filter]] +[[frequent-item-sets-filter]] ==== Filter A query to filter documents to use as part of the analysis. Documents that @@ -123,7 +121,7 @@ Use a top-level query to filter the data set. [discrete] -[[frequent-items-example]] +[[frequent-item-sets-example]] ==== Examples In the following examples, we use the e-commerce {kib} sample data set. @@ -132,14 +130,14 @@ In the following examples, we use the e-commerce {kib} sample data set. [discrete] ==== Aggregation with two analyzed fields and an `exclude` parameter -In the first example, the goal is to find out based on transaction data (1.) -from what product categories the customers purchase products frequently together -and (2.) from which cities they make those purchases. We want to exclude results -where location information is not available (where the city name is `other`). -Finally, we are interested in sets with three or more items, and want to see the +In the first example, the goal is to find out based on transaction data (1.) +from what product categories the customers purchase products frequently together +and (2.) from which cities they make those purchases. We want to exclude results +where location information is not available (where the city name is `other`). +Finally, we are interested in sets with three or more items, and want to see the first three frequent item sets with the highest support. -Note that we use the <> endpoint in this first +Note that we use the <> endpoint in this first example. [source,console] @@ -149,7 +147,7 @@ POST /kibana_sample_data_ecommerce/_async_search "size":0, "aggs":{ "my_agg":{ - "frequent_items":{ + "frequent_item_sets":{ "minimum_set_size":3, "fields":[ { @@ -168,7 +166,7 @@ POST /kibana_sample_data_ecommerce/_async_search ------------------------------------------------- // TEST[skip:setup kibana sample data] -The response of the API call above contains an identifier (`id`) of the async +The response of the API call above contains an identifier (`id`) of the async search request. You can use the identifier to retrieve the search results: [source,console] @@ -225,27 +223,27 @@ The API returns a response similar to the following one: "support" : 0.026310160427807486 } ], - (...) + (...) } } ------------------------------------------------- // TEST[skip:setup kibana sample data] <1> The array of returned item sets. -<2> The `key` object contains one item set. In this case, it consists of two +<2> The `key` object contains one item set. In this case, it consists of two values of the `category.keyword` field and one value of the `geoip.city_name`. -<3> The number of documents that contain the item set. -<4> The support value of the item set. It is calculated by dividing the number -of documents containing the item set by the total number of documents. +<3> The number of documents that contain the item set. +<4> The support value of the item set. It is calculated by dividing the number +of documents containing the item set by the total number of documents. -The response shows that the categories customers purchase from most frequently -together are `Women's Clothing` and `Women's Shoes` and customers from New York -tend to buy items from these categories frequently together. In other words, -customers who buy products labelled `Women's Clothing` more likely buy products -also from the `Women's Shoes` category and customers from New York most likely -buy products from these categories together. The item set with the second -highest support is `Women's Clothing` and `Women's Accessories` with customers -mostly from New York. Finally, the item set with the third highest support is +The response shows that the categories customers purchase from most frequently +together are `Women's Clothing` and `Women's Shoes` and customers from New York +tend to buy items from these categories frequently together. In other words, +customers who buy products labelled `Women's Clothing` more likely buy products +also from the `Women's Shoes` category and customers from New York most likely +buy products from these categories together. The item set with the second +highest support is `Women's Clothing` and `Women's Accessories` with customers +mostly from New York. Finally, the item set with the third highest support is `Men's Clothing` and `Men's Shoes` with customers mostly from Cairo. @@ -262,7 +260,7 @@ POST /kibana_sample_data_ecommerce/_async_search "size": 0, "aggs": { "my_agg": { - "frequent_items": { + "frequent_item_sets": { "minimum_set_size": 3, "fields": [ { "field": "category.keyword" }, @@ -282,20 +280,20 @@ POST /kibana_sample_data_ecommerce/_async_search // TEST[skip:setup kibana sample data] The result will only show item sets that created from documents matching the -filter, namely purchases in Europe. Using `filter`, the calculated `support` -still takes all purchases into acount. That's different than specifying a query -at the top-level, in which case `support` gets calculated only from purchases in +filter, namely purchases in Europe. Using `filter`, the calculated `support` +still takes all purchases into acount. That's different than specifying a query +at the top-level, in which case `support` gets calculated only from purchases in Europe. [discrete] ==== Analyzing numeric values by using a runtime field -The frequent items aggregation enables you to bucket numeric values by using -<>. The next example demonstrates how to use a script to -add a runtime field to your documents called `price_range`, which is -calculated from the taxful total price of the individual transactions. The -runtime field then can be used in the frequent items aggregation as a field to +The frequent items aggregation enables you to bucket numeric values by using +<>. The next example demonstrates how to use a script to +add a runtime field to your documents called `price_range`, which is +calculated from the taxful total price of the individual transactions. The +runtime field then can be used in the frequent items aggregation as a field to analyze. @@ -318,7 +316,7 @@ GET kibana_sample_data_ecommerce/_search "size": 0, "aggs": { "my_agg": { - "frequent_items": { + "frequent_item_sets": { "minimum_set_size": 4, "fields": [ { @@ -402,6 +400,6 @@ The API returns a response similar to the following one: ------------------------------------------------- // TEST[skip:setup kibana sample data] -The response shows the categories that customers purchase from most frequently -together, the location of the customers who tend to buy items from these +The response shows the categories that customers purchase from most frequently +together, the location of the customers who tend to buy items from these categories, and the most frequent price ranges of these purchases. diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 80d97885bc4f..821dab59c35a 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -1573,7 +1573,7 @@ public class MachineLearning extends Plugin ).addResultReader(InternalCategorizationAggregation::new) .setAggregatorRegistrar(s -> s.registerUsage(CategorizeTextAggregationBuilder.NAME)), new AggregationSpec( - FrequentItemSetsAggregationBuilder.NAME, + new ParseField(FrequentItemSetsAggregationBuilder.NAME, FrequentItemSetsAggregationBuilder.DEPRECATED_NAME), FrequentItemSetsAggregationBuilder::new, checkAggLicense(FrequentItemSetsAggregationBuilder.PARSER, FREQUENT_ITEM_SETS_AGG_FEATURE) ).addResultReader(FrequentItemSetsAggregatorFactory.getResultReader()) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/EclatMapReducer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/EclatMapReducer.java index 171a8f4d79d3..9a8a47991af4 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/EclatMapReducer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/EclatMapReducer.java @@ -120,6 +120,7 @@ public final class EclatMapReducer extends AbstractItemSetMapReducer< private static final Logger logger = LogManager.getLogger(EclatMapReducer.class); private static final int VERSION = 1; + // named writable for this implementation public static final String NAME = "frequent_items-eclat-" + VERSION; // cache for marking transactions visited, memory usage: ((BITSET_CACHE_TRAVERSAL_DEPTH -2) * BITSET_CACHE_NUMBER_OF_TRANSACTIONS) / 8 diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/FrequentItemSetsAggregationBuilder.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/FrequentItemSetsAggregationBuilder.java index 3f3db67afe4d..e4263da428bc 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/FrequentItemSetsAggregationBuilder.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/FrequentItemSetsAggregationBuilder.java @@ -37,7 +37,10 @@ import static org.elasticsearch.common.Strings.format; public final class FrequentItemSetsAggregationBuilder extends AbstractAggregationBuilder { - public static final String NAME = "frequent_items"; + public static final String NAME = "frequent_item_sets"; + + // name used between 8.4 - 8.6, kept for backwards compatibility until 9.0 + public static final String DEPRECATED_NAME = "frequent_items"; public static final double DEFAULT_MINIMUM_SUPPORT = 0.01; public static final int DEFAULT_MINIMUM_SET_SIZE = 1; diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/FrequentItemSetsAggregationBuilderTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/FrequentItemSetsAggregationBuilderTests.java index 4890d0893a64..9534ace3d3b9 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/FrequentItemSetsAggregationBuilderTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/FrequentItemSetsAggregationBuilderTests.java @@ -184,7 +184,7 @@ public class FrequentItemSetsAggregationBuilderTests extends AbstractXContentSer randomFrom(EXECUTION_HINT_ALLOWED_MODES) ).subAggregation(AggregationBuilders.avg("fieldA"))); - assertEquals("Aggregator [fi] of type [frequent_items] cannot accept sub-aggregations", e.getMessage()); + assertEquals("Aggregator [fi] of type [frequent_item_sets] cannot accept sub-aggregations", e.getMessage()); e = expectThrows( IllegalArgumentException.class, @@ -202,7 +202,7 @@ public class FrequentItemSetsAggregationBuilderTests extends AbstractXContentSer ).subAggregations(new AggregatorFactories.Builder().addAggregator(AggregationBuilders.avg("fieldA"))) ); - assertEquals("Aggregator [fi] of type [frequent_items] cannot accept sub-aggregations", e.getMessage()); + assertEquals("Aggregator [fi] of type [frequent_item_sets] cannot accept sub-aggregations", e.getMessage()); e = expectThrows( IllegalArgumentException.class, diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/frequent_items_agg.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/frequent_item_sets_agg.yml similarity index 88% rename from x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/frequent_items_agg.yml rename to x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/frequent_item_sets_agg.yml index 39661d4917d0..f5244d271abe 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/frequent_items_agg.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/frequent_item_sets_agg.yml @@ -93,7 +93,7 @@ setup: --- -"Test frequent items array fields": +"Test frequent item sets array fields": - do: search: @@ -103,7 +103,7 @@ setup: "size": 0, "aggs": { "fi": { - "frequent_items": { + "frequent_item_sets": { "minimum_set_size": 3, "minimum_support": 0.3, "fields": [ @@ -123,7 +123,7 @@ setup: - match: { aggregations.fi.buckets.1.key.error_message: ["engine overheated"] } --- -"Test frequent items date format": +"Test frequent item sets date format": - do: search: @@ -141,7 +141,7 @@ setup: "size": 0, "aggs": { "fi": { - "frequent_items": { + "frequent_item_sets": { "minimum_set_size": 3, "minimum_support": 0.3, "fields": [ @@ -159,7 +159,7 @@ setup: --- -"Test frequent items date format 2": +"Test frequent item sets date format 2": - do: search: @@ -177,7 +177,7 @@ setup: "size": 0, "aggs": { "fi": { - "frequent_items": { + "frequent_item_sets": { "minimum_set_size": 2, "minimum_support": 0.3, "fields": [ @@ -195,7 +195,7 @@ setup: - match: { aggregations.fi.buckets.0.key.error_message: ["engine overheated"] } --- -"Test frequent items array fields profile": +"Test frequent item sets array fields profile": - do: search: @@ -206,7 +206,7 @@ setup: "size": 0, "aggs": { "fi": { - "frequent_items": { + "frequent_item_sets": { "minimum_set_size": 3, "minimum_support": 0.2, "fields": [ @@ -229,7 +229,7 @@ setup: - match: { aggregations.fi.profile.unique_items_after_prune: 11 } --- -"Test frequent items flattened fields": +"Test frequent item sets flattened fields": - do: search: @@ -239,7 +239,7 @@ setup: "size": 0, "aggs": { "fi": { - "frequent_items": { + "frequent_item_sets": { "minimum_set_size": 3, "minimum_support": 0.3, "fields": [ @@ -259,7 +259,7 @@ setup: - match: { aggregations.fi.buckets.1.key.data\.error_message: ["engine overheated"] } --- -"Test frequent items as subagg": +"Test frequent item sets as subagg": - do: search: @@ -276,7 +276,7 @@ setup: }, "aggs": { "fi": { - "frequent_items": { + "frequent_item_sets": { "minimum_set_size": 3, "minimum_support": 0.3, "fields": [ @@ -298,7 +298,7 @@ setup: - match: { aggregations.filter_error.fi.buckets.0.key.error_message: ["compressor low pressure"] } --- -"Test frequent items as multi-bucket subagg": +"Test frequent item sets as multi-bucket subagg": - do: search: @@ -314,7 +314,7 @@ setup: }, "aggs": { "fi": { - "frequent_items": { + "frequent_item_sets": { "minimum_set_size": 3, "minimum_support": 0.3, "fields": [ @@ -335,7 +335,7 @@ setup: - match: { aggregations.weekly.buckets.2.fi.buckets.0.doc_count: 1 } --- -"Test frequent items filter": +"Test frequent item sets filter": - do: search: @@ -345,7 +345,7 @@ setup: "size": 0, "aggs": { "fi": { - "frequent_items": { + "frequent_item_sets": { "minimum_set_size": 3, "minimum_support": 0.3, "fields": [ @@ -369,7 +369,7 @@ setup: - match: { aggregations.fi.buckets.0.key.error_message: ["compressor low pressure"] } --- -"Test frequent items exclude": +"Test frequent item sets exclude": - do: search: @@ -379,7 +379,7 @@ setup: "size": 0, "aggs": { "fi": { - "frequent_items": { + "frequent_item_sets": { "minimum_set_size": 3, "minimum_support": 0.3, "fields": [ @@ -401,7 +401,7 @@ setup: - match: { aggregations.fi.buckets.1.support: 0.3 } --- -"Test frequent items include": +"Test frequent item sets include": - do: search: @@ -411,7 +411,7 @@ setup: "size": 0, "aggs": { "fi": { - "frequent_items": { + "frequent_item_sets": { "minimum_set_size": 3, "minimum_support": 0.3, "fields": [ @@ -431,9 +431,9 @@ setup: - match: { aggregations.fi.buckets.0.key.error_message: ["engine overheated"] } --- -"Test frequent items unsupported types": +"Test frequent item sets unsupported types": - do: - catch: /Field \[geo_point\] of type \[geo_point\] is not supported for aggregation \[frequent_items\]/ + catch: /Field \[geo_point\] of type \[geo_point\] is not supported for aggregation \[frequent_item_sets\]/ search: index: store body: > @@ -441,7 +441,7 @@ setup: "size": 0, "aggs": { "fi": { - "frequent_items": { + "frequent_item_sets": { "minimum_set_size": 3, "minimum_support": 0.3, "fields": [ @@ -454,7 +454,7 @@ setup: } } - do: - catch: /Field \[histogram\] of type \[histogram\] is not supported for aggregation \[frequent_items\]/ + catch: /Field \[histogram\] of type \[histogram\] is not supported for aggregation \[frequent_item_sets\]/ search: index: store body: > @@ -462,7 +462,7 @@ setup: "size": 0, "aggs": { "fi": { - "frequent_items": { + "frequent_item_sets": { "minimum_set_size": 3, "minimum_support": 0.3, "fields": [ @@ -476,9 +476,9 @@ setup: } --- -"Test frequent items unsupported subaggs": +"Test frequent item sets unsupported subaggs": - do: - catch: /Aggregator \[fi\] of type \[frequent_items\] cannot accept sub-aggregations/ + catch: /Aggregator \[fi\] of type \[frequent_item_sets\] cannot accept sub-aggregations/ search: index: store body: > @@ -486,7 +486,7 @@ setup: "size": 0, "aggs": { "fi": { - "frequent_items": { + "frequent_item_sets": { "minimum_set_size": 3, "minimum_support": 0.3, "fields": [ @@ -504,3 +504,39 @@ setup: } } } + +--- +"Test deprecated frequent items": + - skip: + features: + - "allowed_warnings" + + - do: + allowed_warnings: + - 'Deprecated field [frequent_items] used, expected [frequent_item_sets] instead' + + search: + index: store + body: > + { + "size": 0, + "aggs": { + "fi": { + "frequent_items": { + "minimum_set_size": 3, + "minimum_support": 0.3, + "fields": [ + {"field": "features"}, + {"field": "error_message"} + ] + } + } + } + } + - length: { aggregations.fi.buckets: 4 } + - match: { aggregations.fi.buckets.0.doc_count: 5 } + - match: { aggregations.fi.buckets.0.support: 0.5 } + - match: { aggregations.fi.buckets.0.key.error_message: ["compressor low pressure"] } + - match: { aggregations.fi.buckets.1.doc_count: 4 } + - match: { aggregations.fi.buckets.1.support: 0.4 } + - match: { aggregations.fi.buckets.1.key.error_message: ["engine overheated"] }