[Security Solutions ] Fixes failed to query DNS data - too_many_buckets_exception (#97069)

## Summary

Fixes a bug where on the DNS tab of networks you can get a `too_many_buckets_exception`. Worked through the query with @angorayc and together I think we figured out that the query could be re-written with aggregations in a way to get the same results without having to query a large amount of terms which causes the buckets exception.

Added a e2e test and one way of being able to call bsearch in the e2e when it returns the search is still in progress so we don't have the large query test cause flakiness 🤞 .

### Checklist

- [x] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios
This commit is contained in:
Frank Hassanabad 2021-04-15 09:22:56 -06:00 committed by GitHub
parent 9b456ac0fe
commit ede8ed3f95
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 227 additions and 109 deletions

View file

@ -1935,17 +1935,15 @@ export const formattedDnsSearchStrategyResponse: MatrixHistogramStrategyResponse
ignoreUnavailable: true,
body: {
aggregations: {
dns_count: {
cardinality: {
field: 'dns.question.registered_domain',
},
},
dns_count: { cardinality: { field: 'dns.question.registered_domain' } },
dns_name_query_count: {
terms: {
field: 'dns.question.registered_domain',
size: 1000000,
order: { unique_domains: 'desc' },
size: 10,
},
aggs: {
unique_domains: { cardinality: { field: 'dns.question.name' } },
dns_question_name: {
date_histogram: {
field: '@timestamp',
@ -1954,47 +1952,13 @@ export const formattedDnsSearchStrategyResponse: MatrixHistogramStrategyResponse
extended_bounds: { min: 1599579675528, max: 1599666075529 },
},
},
bucket_sort: {
bucket_sort: {
sort: [
{
unique_domains: {
order: 'desc',
},
},
{
_key: {
order: 'asc',
},
},
],
from: 0,
size: 10,
},
},
unique_domains: {
cardinality: {
field: 'dns.question.name',
},
},
},
},
},
query: {
bool: {
filter: [
{
bool: {
must: [],
filter: [
{
match_all: {},
},
],
should: [],
must_not: [],
},
},
{ bool: { must: [], filter: [{ match_all: {} }], should: [], must_not: [] } },
{
range: {
'@timestamp': {
@ -2005,15 +1969,7 @@ export const formattedDnsSearchStrategyResponse: MatrixHistogramStrategyResponse
},
},
],
must_not: [
{
term: {
'dns.question.type': {
value: 'PTR',
},
},
},
],
must_not: [{ term: { 'dns.question.type': { value: 'PTR' } } }],
},
},
},

View file

@ -26,50 +26,6 @@ export const mockOptions = {
export const expectedDsl = {
allowNoIndices: true,
body: {
aggregations: {
dns_count: { cardinality: { field: 'dns.question.registered_domain' } },
dns_name_query_count: {
aggs: {
bucket_sort: {
bucket_sort: {
from: 0,
size: 10,
sort: [{ unique_domains: { order: 'desc' } }, { _key: { order: 'asc' } }],
},
},
dns_question_name: {
date_histogram: {
extended_bounds: { max: 1599666075529, min: 1599579675528 },
field: '@timestamp',
fixed_interval: '2700000ms',
min_doc_count: 0,
},
},
unique_domains: { cardinality: { field: 'dns.question.name' } },
},
terms: { field: 'dns.question.registered_domain', size: 1000000 },
},
},
query: {
bool: {
filter: [
{ bool: { filter: [{ match_all: {} }], must: [], must_not: [], should: [] } },
{
range: {
'@timestamp': {
format: 'strict_date_optional_time',
gte: '2020-09-08T15:41:15.528Z',
lte: '2020-09-09T15:41:15.529Z',
},
},
},
],
must_not: [{ term: { 'dns.question.type': { value: 'PTR' } } }],
},
},
},
ignoreUnavailable: true,
index: [
'apm-*-transaction*',
'auditbeat-*',
@ -79,6 +35,47 @@ export const expectedDsl = {
'packetbeat-*',
'winlogbeat-*',
],
ignoreUnavailable: true,
body: {
aggregations: {
dns_count: { cardinality: { field: 'dns.question.registered_domain' } },
dns_name_query_count: {
terms: {
field: 'dns.question.registered_domain',
order: { unique_domains: 'desc' },
size: 10,
},
aggs: {
unique_domains: { cardinality: { field: 'dns.question.name' } },
dns_question_name: {
date_histogram: {
field: '@timestamp',
fixed_interval: '2700000ms',
min_doc_count: 0,
extended_bounds: { min: 1599579675528, max: 1599666075529 },
},
},
},
},
},
query: {
bool: {
filter: [
{ bool: { must: [], filter: [{ match_all: {} }], should: [], must_not: [] } },
{
range: {
'@timestamp': {
gte: '2020-09-08T15:41:15.528Z',
lte: '2020-09-09T15:41:15.529Z',
format: 'strict_date_optional_time',
},
},
},
],
must_not: [{ term: { 'dns.question.type': { value: 'PTR' } } }],
},
},
},
size: 0,
track_total_hits: false,
};

View file

@ -9,14 +9,12 @@ import { isEmpty } from 'lodash/fp';
import moment from 'moment';
import { Direction, MatrixHistogramRequestOptions } from '../../../../../../common/search_strategy';
import { MatrixHistogramRequestOptions } from '../../../../../../common/search_strategy';
import {
calculateTimeSeriesInterval,
createQueryFilterClauses,
} from '../../../../../utils/build_query';
const HUGE_QUERY_SIZE = 1000000;
const getCountAgg = () => ({
dns_count: {
cardinality: {
@ -89,25 +87,18 @@ export const buildDnsHistogramQuery = ({
dns_name_query_count: {
terms: {
field: stackByField,
size: HUGE_QUERY_SIZE,
order: {
unique_domains: 'desc',
},
size: 10,
},
aggs: {
dns_question_name: getHistogramAggregation({ from, to }),
bucket_sort: {
bucket_sort: {
sort: [
{ unique_domains: { order: Direction.desc } },
{ _key: { order: Direction.asc } },
],
from: 0,
size: 10,
},
},
unique_domains: {
cardinality: {
field: 'dns.question.name',
},
},
dns_question_name: getHistogramAggregation({ from, to }),
},
},
},

View file

@ -12,6 +12,7 @@ export default function ({ loadTestFile }) {
loadTestFile(require.resolve('./host_details'));
loadTestFile(require.resolve('./kpi_network'));
loadTestFile(require.resolve('./kpi_hosts'));
loadTestFile(require.resolve('./matrix_dns_histogram'));
loadTestFile(require.resolve('./network_details'));
loadTestFile(require.resolve('./network_dns'));
loadTestFile(require.resolve('./network_top_n_flow'));

View file

@ -0,0 +1,106 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import expect from '@kbn/expect';
import request from 'superagent';
import {
MatrixHistogramQuery,
MatrixHistogramType,
} from '../../../../plugins/security_solution/common/search_strategy';
import { FtrProviderContext } from '../../ftr_provider_context';
/**
* Function copied from here:
* test/api_integration/apis/search/bsearch.ts
*
* Splits the JSON lines from bsearch
*/
export const parseBfetchResponse = (resp: request.Response): Array<Record<string, any>> => {
return resp.text
.trim()
.split('\n')
.map((item) => JSON.parse(item));
};
export default function ({ getService }: FtrProviderContext) {
const esArchiver = getService('esArchiver');
const supertest = getService('supertest');
const retry = getService('retry');
describe('Matrix DNS Histogram', () => {
describe('Large data set', () => {
before(() => esArchiver.load('security_solution/matrix_dns_histogram/large_dns_query'));
after(() => esArchiver.unload('security_solution/matrix_dns_histogram/large_dns_query'));
const FROM = '2000-01-01T00:00:00.000Z';
const TO = '3000-01-01T00:00:00.000Z';
it('Make sure that we get dns data without getting bucket errors when querying large volume of data', async () => {
const { body: networkDns } = await supertest
.post('/internal/search/securitySolutionSearchStrategy/')
.set('kbn-xsrf', 'true')
.send({
defaultIndex: ['large_volume_dns_data'],
docValueFields: [],
factoryQueryType: MatrixHistogramQuery,
histogramType: MatrixHistogramType.dns,
filterQuery:
'{"bool":{"must":[],"filter":[{"match_all":{}}],"should":[],"must_not":[]}}',
isPtrIncluded: false,
timerange: {
interval: '12h',
to: TO,
from: FROM,
},
})
.expect(200);
if (networkDns.isRunning === true) {
await retry.waitForWithTimeout('bsearch to give us results', 5000, async () => {
const resp = await supertest
.post('/internal/bsearch')
.set('kbn-xsrf', 'true')
.send({
batch: [
{
request: {
id: networkDns.id,
defaultIndex: ['large_volume_dns_data'],
docValueFields: [],
factoryQueryType: MatrixHistogramQuery,
histogramType: MatrixHistogramType.dns,
filterQuery:
'{"bool":{"must":[],"filter":[{"match_all":{}}],"should":[],"must_not":[]}}',
isPtrIncluded: false,
timerange: {
interval: '12h',
to: TO,
from: FROM,
},
},
options: {
strategy: 'securitySolutionSearchStrategy',
},
},
],
});
const parsedResponse = parseBfetchResponse(resp);
expect(parsedResponse[0].result.rawResponse.aggregations.dns_count.value).to.equal(
6604
);
return true;
});
} else {
expect(networkDns.isRunning).to.equal(false);
expect(networkDns.rawResponse.aggregations.dns_count.value).to.equal(6604);
}
});
});
});
}

View file

@ -0,0 +1,32 @@
Within this folder is input test data for tests specific to the matrix dns
search strategy and for either corner cases, bugs found on customer sites, or correctness. When possible the mappings should be small and concise but ECS compliant here for these
types of tests. If small, do not gzip them, if large then please do gzip them.
Script that might be helpful if you have to maintain this test at some point to
generate a large set of values
```sh
#!/bin/sh
for i in {1..6600}
do
echo "{"
echo " \"type\": \"doc\","
echo " \"value\": {"
echo " \"id\": \"$i\","
echo " \"index\": \"large_volume_dns_data\","
echo " \"source\": {"
echo " \"@timestamp\": \"2020-10-28T05:00:53.000Z\","
echo " \"dns\": {"
echo " \"question\": {"
echo " \"registered_domain\": \"domain_$i\","
echo " \"name\": \"domain_$i\""
echo " }"
echo " }"
echo " },"
echo " \"type\": \"_doc\""
echo " }"
echo "}"
echo ""
done
```

View file

@ -0,0 +1,35 @@
{
"type": "index",
"value": {
"index": "large_volume_dns_data",
"mappings": {
"dynamic": "strict",
"properties": {
"@timestamp": {
"type": "date"
},
"dns": {
"properties": {
"question": {
"properties": {
"name": {
"type": "keyword"
},
"registered_domain": {
"type": "keyword"
}
}
}
}
}
}
},
"settings": {
"refresh_interval": "5s",
"index": {
"number_of_replicas": "1",
"number_of_shards": "1"
}
}
}
}