diff --git a/docs/changelog/127975.yaml b/docs/changelog/127975.yaml new file mode 100644 index 000000000000..35a76a2474a3 --- /dev/null +++ b/docs/changelog/127975.yaml @@ -0,0 +1,5 @@ +pr: 127975 +summary: Fix a bug in `significant_terms` +area: Aggregations +type: bug +issues: [] diff --git a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms.yml b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms.yml index e8b0419589ed..16f14c960165 100644 --- a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms.yml +++ b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms.yml @@ -73,7 +73,7 @@ - match: {aggregations.class.buckets.1.sig_terms.buckets.0.key: "good"} --- -"Test background filter count ": +"Test background filter count": - requires: cluster_features: ["gte_v7.15.0"] reason: bugfix introduced in 7.15.0 @@ -153,6 +153,257 @@ index: goodbad* body: {"aggs": {"sig_terms": {"significant_terms": {"field": "text", "background_filter": {"bool": {"filter": [{"term": {"class": "good" }}]}}}}}} - match: { aggregations.sig_terms.bg_count: 2 } + +--- +"Test background filter count as sub - global ords": + - requires: + capabilities: + - method: POST + path: /_search + capabilities: [ significant_terms_background_filter_as_sub ] + test_runner_features: capabilities + reason: "bug fix" + + - do: + indices.create: + index: goodbad + body: + settings: + number_of_shards: 1 + mappings: + properties: + text: + type: keyword + class: + type: keyword + - do: + indices.create: + index: goodbad-2 + body: + settings: + number_of_shards: 1 + mappings: + properties: + text: + type: keyword + class: + type: keyword + + - do: + index: + index: goodbad-2 + id: "1" + body: { group: 1, class: "bad" } + - do: + index: + index: goodbad-2 + id: "2" + body: { group: 1, class: "bad" } + + - do: + index: + index: goodbad + id: "1" + body: { group: 1, text: "good", class: "good" } + - do: + index: + index: goodbad + id: "2" + body: { group: 1, text: "good", class: "good" } + - do: + index: + index: goodbad + id: "3" + body: { group: 1, text: "bad", class: "bad" } + - do: + index: + index: goodbad + id: "4" + body: { group: 2, text: "bad", class: "bad" } + + - do: + indices.refresh: + index: [goodbad, goodbad-2] + + - do: + search: + rest_total_hits_as_int: true + index: goodbad* + - match: {hits.total: 6} + + - do: + search: + index: goodbad* + body: + aggs: + group: + range: + field: group + ranges: + # Having many ranges helps catch an issue building no hits buckets + - to: 1 + - from: 1 + to: 2 + - from: 2 + to: 3 + - from: 3 + to: 4 + - from: 4 + to: 5 + - from: 5 + to: 6 + aggs: + sig_terms: + significant_terms: + execution_hint: global_ordinals + field: text + background_filter: + bool: + filter: [{term: {class: good }}] + - match: { aggregations.group.buckets.0.key: "*-1.0" } + - match: { aggregations.group.buckets.0.sig_terms.doc_count: 0 } + - match: { aggregations.group.buckets.0.sig_terms.bg_count: 2 } + - match: { aggregations.group.buckets.1.key: "1.0-2.0" } + - match: { aggregations.group.buckets.1.sig_terms.doc_count: 5 } + - match: { aggregations.group.buckets.1.sig_terms.bg_count: 2 } + - match: { aggregations.group.buckets.2.key: "2.0-3.0" } + - match: { aggregations.group.buckets.2.sig_terms.doc_count: 1 } + - match: { aggregations.group.buckets.2.sig_terms.bg_count: 2 } + - match: { aggregations.group.buckets.3.key: "3.0-4.0" } + - match: { aggregations.group.buckets.3.sig_terms.doc_count: 0 } + - match: { aggregations.group.buckets.3.sig_terms.bg_count: 2 } + - match: { aggregations.group.buckets.4.key: "4.0-5.0" } + - match: { aggregations.group.buckets.4.sig_terms.doc_count: 0 } + - match: { aggregations.group.buckets.4.sig_terms.bg_count: 2 } + - match: { aggregations.group.buckets.5.key: "5.0-6.0" } + - match: { aggregations.group.buckets.5.sig_terms.doc_count: 0 } + - match: { aggregations.group.buckets.5.sig_terms.bg_count: 2 } + +--- +"Test background filter count as sub - map": + - requires: + capabilities: + - method: POST + path: /_search + capabilities: [ significant_terms_background_filter_as_sub ] + test_runner_features: capabilities + reason: "bug fix" + + - do: + indices.create: + index: goodbad + body: + settings: + number_of_shards: 1 + mappings: + properties: + text: + type: keyword + class: + type: keyword + - do: + indices.create: + index: goodbad-2 + body: + settings: + number_of_shards: 1 + mappings: + properties: + text: + type: keyword + class: + type: keyword + + - do: + index: + index: goodbad-2 + id: "1" + body: { group: 1, class: "bad" } + - do: + index: + index: goodbad-2 + id: "2" + body: { group: 1, class: "bad" } + + - do: + index: + index: goodbad + id: "1" + body: { group: 1, text: "good", class: "good" } + - do: + index: + index: goodbad + id: "2" + body: { group: 1, text: "good", class: "good" } + - do: + index: + index: goodbad + id: "3" + body: { group: 1, text: "bad", class: "bad" } + - do: + index: + index: goodbad + id: "4" + body: { group: 2, text: "bad", class: "bad" } + + - do: + indices.refresh: + index: [goodbad, goodbad-2] + + - do: + search: + rest_total_hits_as_int: true + index: goodbad* + - match: {hits.total: 6} + + - do: + search: + index: goodbad* + body: + aggs: + group: + range: + field: group + ranges: + # Having many ranges helps catch an issue building no hits buckets + - to: 1 + - from: 1 + to: 2 + - from: 2 + to: 3 + - from: 3 + to: 4 + - from: 4 + to: 5 + - from: 5 + to: 6 + aggs: + sig_terms: + significant_terms: + execution_hint: map + field: text + background_filter: + bool: + filter: [{term: {class: good }}] + - match: { aggregations.group.buckets.0.key: "*-1.0" } + - match: { aggregations.group.buckets.0.sig_terms.doc_count: 0 } + - match: { aggregations.group.buckets.0.sig_terms.bg_count: 2 } + - match: { aggregations.group.buckets.1.key: "1.0-2.0" } + - match: { aggregations.group.buckets.1.sig_terms.doc_count: 5 } + - match: { aggregations.group.buckets.1.sig_terms.bg_count: 2 } + - match: { aggregations.group.buckets.2.key: "2.0-3.0" } + - match: { aggregations.group.buckets.2.sig_terms.doc_count: 1 } + - match: { aggregations.group.buckets.2.sig_terms.bg_count: 2 } + - match: { aggregations.group.buckets.3.key: "3.0-4.0" } + - match: { aggregations.group.buckets.3.sig_terms.doc_count: 0 } + - match: { aggregations.group.buckets.3.sig_terms.bg_count: 2 } + - match: { aggregations.group.buckets.4.key: "4.0-5.0" } + - match: { aggregations.group.buckets.4.sig_terms.doc_count: 0 } + - match: { aggregations.group.buckets.4.sig_terms.bg_count: 2 } + - match: { aggregations.group.buckets.5.key: "5.0-6.0" } + - match: { aggregations.group.buckets.5.sig_terms.doc_count: 0 } + - match: { aggregations.group.buckets.5.sig_terms.bg_count: 2 } + --- "IP test": - do: diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java index 07b5e372c00b..f8d3a3a65abc 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java +++ b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java @@ -48,6 +48,8 @@ public final class SearchCapabilities { private static final String INDEX_SELECTOR_SYNTAX = "index_expression_selectors"; + private static final String SIGNIFICANT_TERMS_BACKGROUND_FILTER_AS_SUB = "significant_terms_background_filter_as_sub"; + public static final Set CAPABILITIES; static { HashSet capabilities = new HashSet<>(); @@ -66,6 +68,7 @@ public final class SearchCapabilities { capabilities.add(KQL_QUERY_SUPPORTED); capabilities.add(HIGHLIGHT_MAX_ANALYZED_OFFSET_DEFAULT); capabilities.add(INDEX_SELECTOR_SYNTAX); + capabilities.add(SIGNIFICANT_TERMS_BACKGROUND_FILTER_AS_SUB); CAPABILITIES = Set.copyOf(capabilities); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 0e0b7f3f5f5e..22dc307db1ee 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -1087,7 +1087,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr @Override SignificantStringTerms buildNoValuesResult(long owningBucketOrdinal) { - return buildEmptySignificantTermsAggregation(subsetSizes.get(owningBucketOrdinal), supersetSize, significanceHeuristic); + return buildEmptySignificantTermsAggregation(subsetSize(owningBucketOrdinal), supersetSize, significanceHeuristic); } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java index 026912a583ef..5b6e2436ae1e 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java @@ -649,7 +649,7 @@ public final class MapStringTermsAggregator extends AbstractStringTermsAggregato @Override BucketUpdater bucketUpdater(long owningBucketOrd) { - long subsetSize = subsetSizes.get(owningBucketOrd); + long subsetSize = subsetSize(owningBucketOrd); return (spare, ordsEnum, docCount) -> { ordsEnum.readValue(spare.termBytes); spare.subsetDf = docCount; @@ -696,7 +696,7 @@ public final class MapStringTermsAggregator extends AbstractStringTermsAggregato bucketCountThresholds.getMinDocCount(), metadata(), format, - subsetSizes.get(owningBucketOrd), + subsetSize(owningBucketOrd), supersetSize, significanceHeuristic, Arrays.asList(topBuckets) @@ -712,5 +712,10 @@ public final class MapStringTermsAggregator extends AbstractStringTermsAggregato public void close() { Releasables.close(backgroundFrequencies, subsetSizes); } + + private long subsetSize(long owningBucketOrd) { + // if the owningBucketOrd is not in the array that means the bucket is empty so the size has to be 0 + return owningBucketOrd < subsetSizes.size() ? subsetSizes.get(owningBucketOrd) : 0; + } } }