mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 09:48:58 -04:00
[ResponseOps] log error when ES Query rules find docs out of time range (#186332)
resolves https://github.com/elastic/kibana/issues/175980 ## Summary Adds a check with logging if an ES Query rule returns hits which fall outside the time range it's searching. This shouldn't ever happen, but seems to be happening on rare occaisons, so we wanted to add some diagnostics to try to help narrow down the problem. Note that the ES|QL flavor rule does not use this diagnostic, just search source (KQL) and query dsl. We check 3 things: - ensure the `dateStart` sent to fetch was valid - ensure the `dateEnd` sent to fetch was valid - ensure the relevant time fields in hits are within the dateStart/dateEnd range These produce three different error messages: `For rule '<rule-id>', hits were returned with invalid time range start date '<date>' from field '<field>' using query <query>` `For rule '<rule-id>', hits were returned with invalid time range end date '<date>' from field '<field>' using query <query>` `For rule '<rule-id>', the hit with date '<date>' from field '<field>' is outside the query time range. Query: <query>. Document: <document>` Each message has one tag on it: `query-result-out-of-time-range` ## To Verify To test invalid dateStart/dateEnd, hack the Kibana code to set the values to NaN's:d30da09707/x-pack/plugins/stack_alerts/server/rule_types/es_query/executor.ts (L263-L264)
For instance, change that to: const epochStart = new Date('x').getTime(); const epochEnd = new Date('y').getTime(); To test the invdivual document hits, first back out the change you made above - when those error, the checks we're testing below do not run. Hack the Kibana code to make the time out of range:d30da09707/x-pack/plugins/stack_alerts/server/rule_types/es_query/executor.ts (L294)
For instance, change that to: const epochDate = epochStart - 100 For both tests, create an es query rule - kql or dsl - make the relevant changes, and arrange for the rule to get hits each time. The relevant messages should be logged in the Kibana console when the rule runs. ### Checklist Delete any items that are not applicable to this PR. - [x] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios --------- Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
This commit is contained in:
parent
948ba508a8
commit
e12e4496e0
5 changed files with 377 additions and 6 deletions
|
@ -882,6 +882,282 @@ describe('es_query executor', () => {
|
|||
expect(mockSetLimitReached).toHaveBeenCalledTimes(1);
|
||||
expect(mockSetLimitReached).toHaveBeenCalledWith(false);
|
||||
});
|
||||
|
||||
it('should log messages for hits with out-of-range dates for search source', async () => {
|
||||
const epoch = Date.now();
|
||||
const oneYear = 1000 * 60 * 60 * 24 * 365;
|
||||
const dateEarly = new Date(epoch - oneYear).toISOString();
|
||||
const dateStart = new Date(epoch - 1000).toISOString();
|
||||
const dateMiddle = new Date(epoch - 500).toISOString();
|
||||
const dateEnd = new Date(epoch).toISOString();
|
||||
const dateLate = new Date(epoch + oneYear).toISOString();
|
||||
|
||||
function getTimeRange() {
|
||||
return { dateStart, dateEnd };
|
||||
}
|
||||
|
||||
mockFetchSearchSourceQuery.mockResolvedValueOnce({
|
||||
parsedResults: {
|
||||
results: [
|
||||
{
|
||||
group: 'all documents',
|
||||
count: 3,
|
||||
hits: [
|
||||
{ _source: { '@timestamp': dateEarly, value: 1 } },
|
||||
{ _source: { '@timestamp': dateMiddle, value: 2 } },
|
||||
{ _source: { '@timestamp': dateLate, value: 3 } },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
truncated: false,
|
||||
query: 'the query would go here',
|
||||
});
|
||||
|
||||
const executorOptions: ExecutorOptions<EsQueryRuleParams> = {
|
||||
...defaultExecutorOptions,
|
||||
getTimeRange,
|
||||
params: {
|
||||
...defaultProps,
|
||||
searchType: 'searchSource',
|
||||
timeField: '@timestamp',
|
||||
},
|
||||
};
|
||||
await executor(coreMock, executorOptions);
|
||||
|
||||
const allLogCalls = loggerMock.collect(logger);
|
||||
const messages: string[] = [];
|
||||
for (const parms of allLogCalls.error) {
|
||||
const message = parms.shift();
|
||||
messages.push(`${message}`);
|
||||
}
|
||||
|
||||
expect(messages).toEqual([
|
||||
`For rule 'test-rule-id', the hit with date '${dateEarly}' from field '@timestamp' is outside the query time range. Query: <\"the query would go here\">. Document: <{\"_source\":{\"@timestamp\":\"${dateEarly}\",\"value\":1}}>`,
|
||||
`For rule 'test-rule-id', the hit with date '${dateLate}' from field '@timestamp' is outside the query time range. Query: <\"the query would go here\">. Document: <{\"_source\":{\"@timestamp\":\"${dateLate}\",\"value\":3}}>`,
|
||||
]);
|
||||
expect(allLogCalls).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"debug": Array [],
|
||||
"error": Array [
|
||||
Array [
|
||||
Object {
|
||||
"tags": Array [
|
||||
"query-result-out-of-time-range",
|
||||
],
|
||||
},
|
||||
],
|
||||
Array [
|
||||
Object {
|
||||
"tags": Array [
|
||||
"query-result-out-of-time-range",
|
||||
],
|
||||
},
|
||||
],
|
||||
],
|
||||
"fatal": Array [],
|
||||
"info": Array [],
|
||||
"log": Array [],
|
||||
"trace": Array [],
|
||||
"warn": Array [],
|
||||
}
|
||||
`);
|
||||
});
|
||||
|
||||
it('should log messages for bad start / end dates for search source', async () => {
|
||||
function getTimeRange() {
|
||||
return { dateStart: 'x', dateEnd: 'y' };
|
||||
}
|
||||
|
||||
mockFetchSearchSourceQuery.mockResolvedValueOnce({
|
||||
parsedResults: {
|
||||
results: [
|
||||
{
|
||||
group: 'all documents',
|
||||
count: 1,
|
||||
hits: [{ _source: { '@timestamp': new Date().toISOString() } }],
|
||||
},
|
||||
],
|
||||
},
|
||||
truncated: false,
|
||||
query: 'the query would go here',
|
||||
});
|
||||
|
||||
const executorOptions: ExecutorOptions<EsQueryRuleParams> = {
|
||||
...defaultExecutorOptions,
|
||||
getTimeRange,
|
||||
params: {
|
||||
...defaultProps,
|
||||
searchType: 'searchSource',
|
||||
timeField: '@timestamp',
|
||||
},
|
||||
};
|
||||
await executor(coreMock, executorOptions);
|
||||
|
||||
const allLogCalls = loggerMock.collect(logger);
|
||||
const messages: string[] = [];
|
||||
for (const parms of allLogCalls.error) {
|
||||
const message = parms.shift();
|
||||
messages.push(`${message}`);
|
||||
}
|
||||
|
||||
expect(messages).toEqual([
|
||||
`For rule 'test-rule-id', hits were returned with invalid time range start date 'x' from field '@timestamp' using query <"the query would go here">`,
|
||||
`For rule 'test-rule-id', hits were returned with invalid time range end date 'y' from field '@timestamp' using query <"the query would go here">`,
|
||||
]);
|
||||
expect(allLogCalls.error).toMatchInlineSnapshot(`
|
||||
Array [
|
||||
Array [
|
||||
Object {
|
||||
"tags": Array [
|
||||
"query-result-out-of-time-range",
|
||||
],
|
||||
},
|
||||
],
|
||||
Array [
|
||||
Object {
|
||||
"tags": Array [
|
||||
"query-result-out-of-time-range",
|
||||
],
|
||||
},
|
||||
],
|
||||
]
|
||||
`);
|
||||
});
|
||||
|
||||
it('should log messages for hits with out-of-range dates for query dsl', async () => {
|
||||
const epoch = Date.now();
|
||||
const oneYear = 1000 * 60 * 60 * 24 * 365;
|
||||
const dateEarly = new Date(epoch - oneYear).toISOString();
|
||||
const dateStart = new Date(epoch - 1000).toISOString();
|
||||
const dateMiddle = new Date(epoch - 500).toISOString();
|
||||
const dateEnd = new Date(epoch).toISOString();
|
||||
const dateLate = new Date(epoch + oneYear).toISOString();
|
||||
|
||||
function getTimeRange() {
|
||||
return { dateStart, dateEnd };
|
||||
}
|
||||
|
||||
mockFetchEsQuery.mockResolvedValueOnce({
|
||||
parsedResults: {
|
||||
results: [
|
||||
{
|
||||
group: 'all documents',
|
||||
count: 3,
|
||||
hits: [
|
||||
{ _source: { '@timestamp': dateEarly, value: 1 } },
|
||||
{ _source: { '@timestamp': dateMiddle, value: 2 } },
|
||||
{ _source: { '@timestamp': dateLate, value: 3 } },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
truncated: false,
|
||||
query: 'the query would go here',
|
||||
});
|
||||
|
||||
const executorOptions: ExecutorOptions<EsQueryRuleParams> = {
|
||||
...defaultExecutorOptions,
|
||||
getTimeRange,
|
||||
params: {
|
||||
...defaultProps,
|
||||
searchType: 'esQuery',
|
||||
timeField: '@timestamp',
|
||||
},
|
||||
};
|
||||
await executor(coreMock, executorOptions);
|
||||
|
||||
const allLogCalls = loggerMock.collect(logger);
|
||||
const messages: string[] = [];
|
||||
for (const parms of allLogCalls.error) {
|
||||
const message = parms.shift();
|
||||
messages.push(`${message}`);
|
||||
}
|
||||
|
||||
expect(messages).toEqual([
|
||||
`For rule 'test-rule-id', the hit with date '${dateEarly}' from field '@timestamp' is outside the query time range. Query: <\"the query would go here\">. Document: <{\"_source\":{\"@timestamp\":\"${dateEarly}\",\"value\":1}}>`,
|
||||
`For rule 'test-rule-id', the hit with date '${dateLate}' from field '@timestamp' is outside the query time range. Query: <\"the query would go here\">. Document: <{\"_source\":{\"@timestamp\":\"${dateLate}\",\"value\":3}}>`,
|
||||
]);
|
||||
expect(allLogCalls.error).toMatchInlineSnapshot(`
|
||||
Array [
|
||||
Array [
|
||||
Object {
|
||||
"tags": Array [
|
||||
"query-result-out-of-time-range",
|
||||
],
|
||||
},
|
||||
],
|
||||
Array [
|
||||
Object {
|
||||
"tags": Array [
|
||||
"query-result-out-of-time-range",
|
||||
],
|
||||
},
|
||||
],
|
||||
]
|
||||
`);
|
||||
});
|
||||
|
||||
it('should log messages for bad start / end dates for query dsl', async () => {
|
||||
function getTimeRange() {
|
||||
return { dateStart: 'x', dateEnd: 'y' };
|
||||
}
|
||||
|
||||
mockFetchEsQuery.mockResolvedValueOnce({
|
||||
parsedResults: {
|
||||
results: [
|
||||
{
|
||||
group: 'all documents',
|
||||
count: 1,
|
||||
hits: [{ _source: { '@timestamp': new Date().toISOString() } }],
|
||||
},
|
||||
],
|
||||
},
|
||||
truncated: false,
|
||||
query: 'the query would go here',
|
||||
});
|
||||
|
||||
const executorOptions: ExecutorOptions<EsQueryRuleParams> = {
|
||||
...defaultExecutorOptions,
|
||||
getTimeRange,
|
||||
params: {
|
||||
...defaultProps,
|
||||
searchType: 'esQuery',
|
||||
timeField: '@timestamp',
|
||||
},
|
||||
};
|
||||
await executor(coreMock, executorOptions);
|
||||
|
||||
const allLogCalls = loggerMock.collect(logger);
|
||||
const messages: string[] = [];
|
||||
for (const parms of allLogCalls.error) {
|
||||
const message = parms.shift();
|
||||
messages.push(`${message}`);
|
||||
}
|
||||
|
||||
expect(messages).toEqual([
|
||||
`For rule 'test-rule-id', hits were returned with invalid time range start date 'x' from field '@timestamp' using query <"the query would go here">`,
|
||||
`For rule 'test-rule-id', hits were returned with invalid time range end date 'y' from field '@timestamp' using query <"the query would go here">`,
|
||||
]);
|
||||
expect(allLogCalls.error).toMatchInlineSnapshot(`
|
||||
Array [
|
||||
Array [
|
||||
Object {
|
||||
"tags": Array [
|
||||
"query-result-out-of-time-range",
|
||||
],
|
||||
},
|
||||
],
|
||||
Array [
|
||||
Object {
|
||||
"tags": Array [
|
||||
"query-result-out-of-time-range",
|
||||
],
|
||||
},
|
||||
],
|
||||
]
|
||||
`);
|
||||
});
|
||||
});
|
||||
|
||||
describe('tryToParseAsDate', () => {
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
*/
|
||||
import { sha256 } from 'js-sha256';
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { CoreSetup } from '@kbn/core/server';
|
||||
import { CoreSetup, Logger } from '@kbn/core/server';
|
||||
import { isGroupAggregation, UngroupedGroupId } from '@kbn/triggers-actions-ui-plugin/common';
|
||||
import {
|
||||
ALERT_EVALUATION_THRESHOLD,
|
||||
|
@ -16,6 +16,9 @@ import {
|
|||
} from '@kbn/rule-data-utils';
|
||||
|
||||
import { AlertsClientError } from '@kbn/alerting-plugin/server';
|
||||
import { get } from 'lodash';
|
||||
import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
|
||||
|
||||
import { ComparatorFns } from '../../../common';
|
||||
import {
|
||||
addMessages,
|
||||
|
@ -71,7 +74,7 @@ export async function executor(core: CoreSetup, options: ExecutorOptions<EsQuery
|
|||
let latestTimestamp: string | undefined = tryToParseAsDate(state.latestTimestamp);
|
||||
const { dateStart, dateEnd } = getTimeRange(`${params.timeWindowSize}${params.timeWindowUnit}`);
|
||||
|
||||
const { parsedResults, link, index } = searchSourceRule
|
||||
const { parsedResults, link, index, query } = searchSourceRule
|
||||
? await fetchSearchSourceQuery({
|
||||
ruleId,
|
||||
alertLimit,
|
||||
|
@ -122,6 +125,19 @@ export async function executor(core: CoreSetup, options: ExecutorOptions<EsQuery
|
|||
const alertId = result.group;
|
||||
const value = result.value ?? result.count;
|
||||
|
||||
// check hits for dates out of range
|
||||
if (!esqlQueryRule) {
|
||||
checkHitsForDateOutOfRange(
|
||||
logger,
|
||||
ruleId,
|
||||
result.hits,
|
||||
params.timeField,
|
||||
dateStart,
|
||||
dateEnd,
|
||||
query
|
||||
);
|
||||
}
|
||||
|
||||
// group aggregations use the bucket selector agg to compare conditions
|
||||
// within the ES query, so only 'met' results are returned, therefore we don't need
|
||||
// to use the compareFn
|
||||
|
@ -231,6 +247,84 @@ export async function executor(core: CoreSetup, options: ExecutorOptions<EsQuery
|
|||
return { state: { latestTimestamp } };
|
||||
}
|
||||
|
||||
// diagnostic to help solve a puzzle of sometimes returning documents
|
||||
// not matching the expected time range; usually kql using ccs.
|
||||
function checkHitsForDateOutOfRange(
|
||||
logger: Logger,
|
||||
ruleId: string,
|
||||
hits: Array<estypes.SearchHit<unknown>>,
|
||||
timeField: string | undefined,
|
||||
dateStart: string,
|
||||
dateEnd: string,
|
||||
query: unknown
|
||||
) {
|
||||
if (!timeField) return;
|
||||
|
||||
const epochStart = new Date(dateStart).getTime();
|
||||
const epochEnd = new Date(dateEnd).getTime();
|
||||
const messageMeta = { tags: ['query-result-out-of-time-range'] };
|
||||
|
||||
const messagePrefix = `For rule '${ruleId}'`;
|
||||
const usingQuery = `using query <${JSON.stringify(query)}>`;
|
||||
const hitsWereReturned = 'hits were returned with invalid time range';
|
||||
|
||||
let errors = 0;
|
||||
if (isNaN(epochStart)) {
|
||||
errors++;
|
||||
logger.error(
|
||||
`${messagePrefix}, ${hitsWereReturned} start date '${dateStart}' from field '${timeField}' ${usingQuery}`,
|
||||
messageMeta
|
||||
);
|
||||
}
|
||||
|
||||
if (isNaN(epochEnd)) {
|
||||
errors++;
|
||||
logger.error(
|
||||
`${messagePrefix}, ${hitsWereReturned} end date '${dateEnd}' from field '${timeField}' ${usingQuery}`,
|
||||
messageMeta
|
||||
);
|
||||
}
|
||||
|
||||
if (errors > 0) return;
|
||||
|
||||
const outsideTimeRange = 'outside the query time range';
|
||||
|
||||
for (const hit of hits) {
|
||||
const dateVal = get(hit, `_source.${timeField}`);
|
||||
const epochDate = getEpochDateFromString(dateVal);
|
||||
|
||||
if (epochDate) {
|
||||
if (epochDate < epochStart || epochDate > epochEnd) {
|
||||
const message = `the hit with date '${dateVal}' from field '${timeField}' is ${outsideTimeRange}`;
|
||||
const queryString = `Query: <${JSON.stringify(query)}>`;
|
||||
const document = `Document: <${JSON.stringify(hit)}>`;
|
||||
logger.error(`${messagePrefix}, ${message}. ${queryString}. ${document}`, messageMeta);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function getEpochDateFromString(dateString: string): number | null {
|
||||
let date: Date;
|
||||
try {
|
||||
date = new Date(dateString);
|
||||
} catch (e) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const time = date.getTime();
|
||||
if (!isNaN(time)) return time;
|
||||
|
||||
// if not a valid date string, try it as a stringified number
|
||||
const dateNum = parseInt(dateString, 10);
|
||||
if (isNaN(dateNum)) return null;
|
||||
|
||||
const timeFromNumber = new Date(dateNum).getTime();
|
||||
if (isNaN(timeFromNumber)) return null;
|
||||
|
||||
return timeFromNumber;
|
||||
}
|
||||
|
||||
export function getValidTimefieldSort(
|
||||
sortValues: Array<string | number | null> = []
|
||||
): undefined | string {
|
||||
|
|
|
@ -145,6 +145,7 @@ export async function fetchEsQuery({
|
|||
sourceFieldsParams: params.sourceFields,
|
||||
}),
|
||||
link,
|
||||
query: sortedQuery,
|
||||
index: params.index,
|
||||
};
|
||||
}
|
||||
|
|
|
@ -66,6 +66,7 @@ export async function fetchEsqlQuery({
|
|||
|
||||
return {
|
||||
link,
|
||||
query,
|
||||
numMatches: Number(response.values.length),
|
||||
parsedResults: parseAggregationResults({
|
||||
isCountAgg: true,
|
||||
|
|
|
@ -69,11 +69,9 @@ export async function fetchSearchSourceQuery({
|
|||
alertLimit
|
||||
);
|
||||
|
||||
const searchRequestBody: unknown = searchSource.getSearchRequestBody();
|
||||
logger.debug(
|
||||
() =>
|
||||
`search source query rule (${ruleId}) query: ${JSON.stringify(
|
||||
searchSource.getSearchRequestBody()
|
||||
)}`
|
||||
() => `search source query rule (${ruleId}) query: ${JSON.stringify(searchRequestBody)}`
|
||||
);
|
||||
|
||||
const searchResult = await searchSource.fetch();
|
||||
|
@ -99,6 +97,7 @@ export async function fetchSearchSourceQuery({
|
|||
sourceFieldsParams: params.sourceFields,
|
||||
}),
|
||||
index: [index.name],
|
||||
query: searchRequestBody,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue