[ResponseOps] log error when ES Query rules find docs out of time range (#186332)

resolves https://github.com/elastic/kibana/issues/175980

## Summary

Adds a check with logging if an ES Query rule returns hits which fall
outside the time range it's searching. This shouldn't ever happen, but
seems to be happening on rare occaisons, so we wanted to add some
diagnostics to try to help narrow down the problem.

Note that the ES|QL flavor rule does not use this diagnostic, just
search source (KQL) and query dsl.

We check 3 things:
- ensure the `dateStart` sent to fetch was valid
- ensure the `dateEnd` sent to fetch was valid
- ensure the relevant time fields in hits are within the
dateStart/dateEnd range

These produce three different error messages:

`For rule '<rule-id>', hits were returned with invalid time range start
date '<date>' from field '<field>' using query <query>`

`For rule '<rule-id>', hits were returned with invalid time range end
date '<date>' from field '<field>' using query <query>`

`For rule '<rule-id>', the hit with date '<date>' from field '<field>'
is outside the query time range. Query: <query>. Document: <document>`

Each message has one tag on it: `query-result-out-of-time-range`

## To Verify

To test invalid dateStart/dateEnd, hack the Kibana code to set the
values to NaN's:


d30da09707/x-pack/plugins/stack_alerts/server/rule_types/es_query/executor.ts (L263-L264)

For instance, change that to:

    const epochStart = new Date('x').getTime();
    const epochEnd = new Date('y').getTime();

To test the invdivual document hits, first back out the change you made
above - when those error, the checks we're testing below do not run.
Hack the Kibana code to make the time out of range:


d30da09707/x-pack/plugins/stack_alerts/server/rule_types/es_query/executor.ts (L294)

For instance, change that to:

    const epochDate = epochStart - 100

For both tests, create an es query rule - kql or dsl - make the relevant
changes, and arrange for the rule to get hits each time. The relevant
messages should be logged in the Kibana console when the rule runs.

### Checklist

Delete any items that are not applicable to this PR.

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios

---------

Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
This commit is contained in:
Patrick Mueller 2024-07-23 16:17:11 -04:00 committed by GitHub
parent 948ba508a8
commit e12e4496e0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 377 additions and 6 deletions

View file

@ -882,6 +882,282 @@ describe('es_query executor', () => {
expect(mockSetLimitReached).toHaveBeenCalledTimes(1);
expect(mockSetLimitReached).toHaveBeenCalledWith(false);
});
it('should log messages for hits with out-of-range dates for search source', async () => {
const epoch = Date.now();
const oneYear = 1000 * 60 * 60 * 24 * 365;
const dateEarly = new Date(epoch - oneYear).toISOString();
const dateStart = new Date(epoch - 1000).toISOString();
const dateMiddle = new Date(epoch - 500).toISOString();
const dateEnd = new Date(epoch).toISOString();
const dateLate = new Date(epoch + oneYear).toISOString();
function getTimeRange() {
return { dateStart, dateEnd };
}
mockFetchSearchSourceQuery.mockResolvedValueOnce({
parsedResults: {
results: [
{
group: 'all documents',
count: 3,
hits: [
{ _source: { '@timestamp': dateEarly, value: 1 } },
{ _source: { '@timestamp': dateMiddle, value: 2 } },
{ _source: { '@timestamp': dateLate, value: 3 } },
],
},
],
},
truncated: false,
query: 'the query would go here',
});
const executorOptions: ExecutorOptions<EsQueryRuleParams> = {
...defaultExecutorOptions,
getTimeRange,
params: {
...defaultProps,
searchType: 'searchSource',
timeField: '@timestamp',
},
};
await executor(coreMock, executorOptions);
const allLogCalls = loggerMock.collect(logger);
const messages: string[] = [];
for (const parms of allLogCalls.error) {
const message = parms.shift();
messages.push(`${message}`);
}
expect(messages).toEqual([
`For rule 'test-rule-id', the hit with date '${dateEarly}' from field '@timestamp' is outside the query time range. Query: <\"the query would go here\">. Document: <{\"_source\":{\"@timestamp\":\"${dateEarly}\",\"value\":1}}>`,
`For rule 'test-rule-id', the hit with date '${dateLate}' from field '@timestamp' is outside the query time range. Query: <\"the query would go here\">. Document: <{\"_source\":{\"@timestamp\":\"${dateLate}\",\"value\":3}}>`,
]);
expect(allLogCalls).toMatchInlineSnapshot(`
Object {
"debug": Array [],
"error": Array [
Array [
Object {
"tags": Array [
"query-result-out-of-time-range",
],
},
],
Array [
Object {
"tags": Array [
"query-result-out-of-time-range",
],
},
],
],
"fatal": Array [],
"info": Array [],
"log": Array [],
"trace": Array [],
"warn": Array [],
}
`);
});
it('should log messages for bad start / end dates for search source', async () => {
function getTimeRange() {
return { dateStart: 'x', dateEnd: 'y' };
}
mockFetchSearchSourceQuery.mockResolvedValueOnce({
parsedResults: {
results: [
{
group: 'all documents',
count: 1,
hits: [{ _source: { '@timestamp': new Date().toISOString() } }],
},
],
},
truncated: false,
query: 'the query would go here',
});
const executorOptions: ExecutorOptions<EsQueryRuleParams> = {
...defaultExecutorOptions,
getTimeRange,
params: {
...defaultProps,
searchType: 'searchSource',
timeField: '@timestamp',
},
};
await executor(coreMock, executorOptions);
const allLogCalls = loggerMock.collect(logger);
const messages: string[] = [];
for (const parms of allLogCalls.error) {
const message = parms.shift();
messages.push(`${message}`);
}
expect(messages).toEqual([
`For rule 'test-rule-id', hits were returned with invalid time range start date 'x' from field '@timestamp' using query <"the query would go here">`,
`For rule 'test-rule-id', hits were returned with invalid time range end date 'y' from field '@timestamp' using query <"the query would go here">`,
]);
expect(allLogCalls.error).toMatchInlineSnapshot(`
Array [
Array [
Object {
"tags": Array [
"query-result-out-of-time-range",
],
},
],
Array [
Object {
"tags": Array [
"query-result-out-of-time-range",
],
},
],
]
`);
});
it('should log messages for hits with out-of-range dates for query dsl', async () => {
const epoch = Date.now();
const oneYear = 1000 * 60 * 60 * 24 * 365;
const dateEarly = new Date(epoch - oneYear).toISOString();
const dateStart = new Date(epoch - 1000).toISOString();
const dateMiddle = new Date(epoch - 500).toISOString();
const dateEnd = new Date(epoch).toISOString();
const dateLate = new Date(epoch + oneYear).toISOString();
function getTimeRange() {
return { dateStart, dateEnd };
}
mockFetchEsQuery.mockResolvedValueOnce({
parsedResults: {
results: [
{
group: 'all documents',
count: 3,
hits: [
{ _source: { '@timestamp': dateEarly, value: 1 } },
{ _source: { '@timestamp': dateMiddle, value: 2 } },
{ _source: { '@timestamp': dateLate, value: 3 } },
],
},
],
},
truncated: false,
query: 'the query would go here',
});
const executorOptions: ExecutorOptions<EsQueryRuleParams> = {
...defaultExecutorOptions,
getTimeRange,
params: {
...defaultProps,
searchType: 'esQuery',
timeField: '@timestamp',
},
};
await executor(coreMock, executorOptions);
const allLogCalls = loggerMock.collect(logger);
const messages: string[] = [];
for (const parms of allLogCalls.error) {
const message = parms.shift();
messages.push(`${message}`);
}
expect(messages).toEqual([
`For rule 'test-rule-id', the hit with date '${dateEarly}' from field '@timestamp' is outside the query time range. Query: <\"the query would go here\">. Document: <{\"_source\":{\"@timestamp\":\"${dateEarly}\",\"value\":1}}>`,
`For rule 'test-rule-id', the hit with date '${dateLate}' from field '@timestamp' is outside the query time range. Query: <\"the query would go here\">. Document: <{\"_source\":{\"@timestamp\":\"${dateLate}\",\"value\":3}}>`,
]);
expect(allLogCalls.error).toMatchInlineSnapshot(`
Array [
Array [
Object {
"tags": Array [
"query-result-out-of-time-range",
],
},
],
Array [
Object {
"tags": Array [
"query-result-out-of-time-range",
],
},
],
]
`);
});
it('should log messages for bad start / end dates for query dsl', async () => {
function getTimeRange() {
return { dateStart: 'x', dateEnd: 'y' };
}
mockFetchEsQuery.mockResolvedValueOnce({
parsedResults: {
results: [
{
group: 'all documents',
count: 1,
hits: [{ _source: { '@timestamp': new Date().toISOString() } }],
},
],
},
truncated: false,
query: 'the query would go here',
});
const executorOptions: ExecutorOptions<EsQueryRuleParams> = {
...defaultExecutorOptions,
getTimeRange,
params: {
...defaultProps,
searchType: 'esQuery',
timeField: '@timestamp',
},
};
await executor(coreMock, executorOptions);
const allLogCalls = loggerMock.collect(logger);
const messages: string[] = [];
for (const parms of allLogCalls.error) {
const message = parms.shift();
messages.push(`${message}`);
}
expect(messages).toEqual([
`For rule 'test-rule-id', hits were returned with invalid time range start date 'x' from field '@timestamp' using query <"the query would go here">`,
`For rule 'test-rule-id', hits were returned with invalid time range end date 'y' from field '@timestamp' using query <"the query would go here">`,
]);
expect(allLogCalls.error).toMatchInlineSnapshot(`
Array [
Array [
Object {
"tags": Array [
"query-result-out-of-time-range",
],
},
],
Array [
Object {
"tags": Array [
"query-result-out-of-time-range",
],
},
],
]
`);
});
});
describe('tryToParseAsDate', () => {

View file

@ -6,7 +6,7 @@
*/
import { sha256 } from 'js-sha256';
import { i18n } from '@kbn/i18n';
import { CoreSetup } from '@kbn/core/server';
import { CoreSetup, Logger } from '@kbn/core/server';
import { isGroupAggregation, UngroupedGroupId } from '@kbn/triggers-actions-ui-plugin/common';
import {
ALERT_EVALUATION_THRESHOLD,
@ -16,6 +16,9 @@ import {
} from '@kbn/rule-data-utils';
import { AlertsClientError } from '@kbn/alerting-plugin/server';
import { get } from 'lodash';
import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { ComparatorFns } from '../../../common';
import {
addMessages,
@ -71,7 +74,7 @@ export async function executor(core: CoreSetup, options: ExecutorOptions<EsQuery
let latestTimestamp: string | undefined = tryToParseAsDate(state.latestTimestamp);
const { dateStart, dateEnd } = getTimeRange(`${params.timeWindowSize}${params.timeWindowUnit}`);
const { parsedResults, link, index } = searchSourceRule
const { parsedResults, link, index, query } = searchSourceRule
? await fetchSearchSourceQuery({
ruleId,
alertLimit,
@ -122,6 +125,19 @@ export async function executor(core: CoreSetup, options: ExecutorOptions<EsQuery
const alertId = result.group;
const value = result.value ?? result.count;
// check hits for dates out of range
if (!esqlQueryRule) {
checkHitsForDateOutOfRange(
logger,
ruleId,
result.hits,
params.timeField,
dateStart,
dateEnd,
query
);
}
// group aggregations use the bucket selector agg to compare conditions
// within the ES query, so only 'met' results are returned, therefore we don't need
// to use the compareFn
@ -231,6 +247,84 @@ export async function executor(core: CoreSetup, options: ExecutorOptions<EsQuery
return { state: { latestTimestamp } };
}
// diagnostic to help solve a puzzle of sometimes returning documents
// not matching the expected time range; usually kql using ccs.
function checkHitsForDateOutOfRange(
logger: Logger,
ruleId: string,
hits: Array<estypes.SearchHit<unknown>>,
timeField: string | undefined,
dateStart: string,
dateEnd: string,
query: unknown
) {
if (!timeField) return;
const epochStart = new Date(dateStart).getTime();
const epochEnd = new Date(dateEnd).getTime();
const messageMeta = { tags: ['query-result-out-of-time-range'] };
const messagePrefix = `For rule '${ruleId}'`;
const usingQuery = `using query <${JSON.stringify(query)}>`;
const hitsWereReturned = 'hits were returned with invalid time range';
let errors = 0;
if (isNaN(epochStart)) {
errors++;
logger.error(
`${messagePrefix}, ${hitsWereReturned} start date '${dateStart}' from field '${timeField}' ${usingQuery}`,
messageMeta
);
}
if (isNaN(epochEnd)) {
errors++;
logger.error(
`${messagePrefix}, ${hitsWereReturned} end date '${dateEnd}' from field '${timeField}' ${usingQuery}`,
messageMeta
);
}
if (errors > 0) return;
const outsideTimeRange = 'outside the query time range';
for (const hit of hits) {
const dateVal = get(hit, `_source.${timeField}`);
const epochDate = getEpochDateFromString(dateVal);
if (epochDate) {
if (epochDate < epochStart || epochDate > epochEnd) {
const message = `the hit with date '${dateVal}' from field '${timeField}' is ${outsideTimeRange}`;
const queryString = `Query: <${JSON.stringify(query)}>`;
const document = `Document: <${JSON.stringify(hit)}>`;
logger.error(`${messagePrefix}, ${message}. ${queryString}. ${document}`, messageMeta);
}
}
}
}
function getEpochDateFromString(dateString: string): number | null {
let date: Date;
try {
date = new Date(dateString);
} catch (e) {
return null;
}
const time = date.getTime();
if (!isNaN(time)) return time;
// if not a valid date string, try it as a stringified number
const dateNum = parseInt(dateString, 10);
if (isNaN(dateNum)) return null;
const timeFromNumber = new Date(dateNum).getTime();
if (isNaN(timeFromNumber)) return null;
return timeFromNumber;
}
export function getValidTimefieldSort(
sortValues: Array<string | number | null> = []
): undefined | string {

View file

@ -145,6 +145,7 @@ export async function fetchEsQuery({
sourceFieldsParams: params.sourceFields,
}),
link,
query: sortedQuery,
index: params.index,
};
}

View file

@ -66,6 +66,7 @@ export async function fetchEsqlQuery({
return {
link,
query,
numMatches: Number(response.values.length),
parsedResults: parseAggregationResults({
isCountAgg: true,

View file

@ -69,11 +69,9 @@ export async function fetchSearchSourceQuery({
alertLimit
);
const searchRequestBody: unknown = searchSource.getSearchRequestBody();
logger.debug(
() =>
`search source query rule (${ruleId}) query: ${JSON.stringify(
searchSource.getSearchRequestBody()
)}`
() => `search source query rule (${ruleId}) query: ${JSON.stringify(searchRequestBody)}`
);
const searchResult = await searchSource.fetch();
@ -99,6 +97,7 @@ export async function fetchSearchSourceQuery({
sourceFieldsParams: params.sourceFields,
}),
index: [index.name],
query: searchRequestBody,
};
}