[Response Ops][Alerting] Expose shard failures from querying over CCS indices in ES query rule type (#189312)

This commit is contained in:
Ying Mao 2024-09-05 21:48:46 -04:00 committed by GitHub
parent ffda01784d
commit 4a0919a7ea
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 657 additions and 8 deletions

View file

@ -188,6 +188,53 @@ describe('RuleExecutionStatus', () => {
expect(status.error).toEqual({ message: 'an error', reason: 'unknown' });
expect(status.warning).toBe(undefined);
});
test('task state with framework warning and rule execution warning - only show framework warning', () => {
const ruleResultService = new RuleResultService();
const lastRunSetters = ruleResultService.getLastRunSetters();
lastRunSetters.addLastRunWarning('a rule execution warning');
const { status, metrics } = executionStatusFromState({
stateWithMetrics: {
alertInstances: { a: {} },
metrics: executionMetrics,
},
ruleResultService,
});
checkDateIsNearNow(status.lastExecutionDate);
expect(status.warning).toEqual({
message: `a rule execution warning`,
reason: RuleExecutionStatusWarningReasons.EXECUTION,
});
expect(status.status).toBe('warning');
expect(status.error).toBe(undefined);
testExpectedMetrics(metrics!, executionMetrics);
});
test('task state with rule execution warning', () => {
const ruleResultService = new RuleResultService();
const lastRunSetters = ruleResultService.getLastRunSetters();
lastRunSetters.addLastRunWarning('a rule execution warning');
const { status, metrics } = executionStatusFromState({
stateWithMetrics: {
alertInstances: { a: {} },
metrics: { ...executionMetrics, triggeredActionsStatus: ActionsCompletion.PARTIAL },
},
ruleResultService,
});
checkDateIsNearNow(status.lastExecutionDate);
expect(status.warning).toEqual({
message: translations.taskRunner.warning.maxExecutableActions,
reason: RuleExecutionStatusWarningReasons.MAX_EXECUTABLE_ACTIONS,
});
expect(status.status).toBe('warning');
expect(status.error).toBe(undefined);
testExpectedMetrics(metrics!, {
...executionMetrics,
triggeredActionsStatus: ActionsCompletion.PARTIAL,
});
});
});
describe('executionStatusFromError()', () => {

View file

@ -70,7 +70,8 @@ export function executionStatusFromState({
}
// Overwrite status to be error if last run reported any errors
const { errors: errorsFromLastRun } = ruleResultService.getLastRunResults();
const { errors: errorsFromLastRun, warnings: warningsFromLastRun } =
ruleResultService.getLastRunResults();
if (errorsFromLastRun.length > 0) {
status = RuleExecutionStatusValues[2];
// These errors are reported by ruleResultService.addLastRunError, therefore they are landed in successful execution map
@ -80,6 +81,15 @@ export function executionStatusFromState({
};
}
// Set warning status if last run reported any warnings and framework has not set any warnings
if (warningsFromLastRun.length > 0 && !warning) {
status = RuleExecutionStatusValues[5];
warning = {
reason: RuleExecutionStatusWarningReasons.EXECUTION,
message: warningsFromLastRun.join(','),
};
}
return {
status: {
lastExecutionDate: lastExecutionDate ?? new Date(),

View file

@ -53,7 +53,7 @@ export async function executor(core: CoreSetup, options: ExecutorOptions<EsQuery
logger,
getTimeRange,
} = options;
const { alertsClient, scopedClusterClient, share } = services;
const { alertsClient, ruleResultService, scopedClusterClient, share } = services;
if (!alertsClient) {
throw new AlertsClientError();
@ -89,6 +89,7 @@ export async function executor(core: CoreSetup, options: ExecutorOptions<EsQuery
getSearchSourceClient: services.getSearchSourceClient,
logger,
getDataViews: services.getDataViews,
ruleResultService,
},
dateStart,
dateEnd,
@ -119,6 +120,7 @@ export async function executor(core: CoreSetup, options: ExecutorOptions<EsQuery
services: {
scopedClusterClient,
logger,
ruleResultService,
},
dateStart,
dateEnd,

View file

@ -9,7 +9,9 @@ import { OnlyEsQueryRuleParams } from '../types';
import { Comparator } from '../../../../common/comparator_types';
import { fetchEsQuery } from './fetch_es_query';
import { elasticsearchServiceMock } from '@kbn/core-elasticsearch-server-mocks';
import { elasticsearchClientMock } from '@kbn/core-elasticsearch-client-server-mocks';
import { loggerMock } from '@kbn/logging-mocks';
import { publicRuleResultServiceMock } from '@kbn/alerting-plugin/server/monitoring/rule_result_service.mock';
jest.mock('@kbn/triggers-actions-ui-plugin/common', () => {
const actual = jest.requireActual('@kbn/triggers-actions-ui-plugin/common');
@ -37,6 +39,7 @@ const defaultParams: OnlyEsQueryRuleParams = {
const logger = loggerMock.create();
const scopedClusterClientMock = elasticsearchServiceMock.createScopedClusterClient();
const mockRuleResultService = publicRuleResultServiceMock.create();
describe('fetchEsQuery', () => {
beforeAll(() => {
@ -52,6 +55,7 @@ describe('fetchEsQuery', () => {
const services = {
scopedClusterClient: scopedClusterClientMock,
logger,
ruleResultService: mockRuleResultService,
};
it('should add time filter if timestamp if defined and excludeHitsFromPreviousRun is true', async () => {
const params = defaultParams;
@ -479,4 +483,139 @@ describe('fetchEsQuery', () => {
{ meta: true }
);
});
it('should bubble up CCS errors stored in the _shards field of the search result', async () => {
scopedClusterClientMock.asCurrentUser.search.mockResolvedValueOnce(
elasticsearchClientMock.createSuccessTransportRequestPromise({
took: 16,
timed_out: false,
_shards: {
total: 51,
successful: 48,
skipped: 48,
failed: 3,
failures: [
{
shard: 0,
index: 'ccs-index',
node: '8jMc8jz-Q6qFmKZXfijt-A',
reason: {
type: 'illegal_argument_exception',
reason:
"Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.",
},
},
],
},
hits: {
total: {
value: 0,
relation: 'eq',
},
max_score: 0,
hits: [],
},
})
);
await fetchEsQuery({
ruleId: 'abc',
name: 'test-rule',
params: defaultParams,
timestamp: '2020-02-09T23:15:41.941Z',
services,
spacePrefix: '',
publicBaseUrl: '',
dateStart: new Date().toISOString(),
dateEnd: new Date().toISOString(),
});
expect(mockRuleResultService.addLastRunWarning).toHaveBeenCalledWith(
`Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.`
);
expect(mockRuleResultService.setLastRunOutcomeMessage).toHaveBeenCalledWith(
`Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.`
);
});
it('should bubble up CCS errors stored in the _clusters field of the search result', async () => {
scopedClusterClientMock.asCurrentUser.search.mockResolvedValueOnce(
// @ts-expect-error - _clusters.details not a valid response but it is irl
elasticsearchClientMock.createSuccessTransportRequestPromise({
took: 6,
timed_out: false,
num_reduce_phases: 0,
_shards: { total: 0, successful: 0, skipped: 0, failed: 0 },
_clusters: {
total: 1,
successful: 0,
skipped: 1,
running: 0,
partial: 0,
failed: 0,
details: {
test: {
status: 'skipped',
indices: '.kibana-event-log*',
timed_out: false,
failures: [
{
shard: -1,
index: null,
reason: {
type: 'search_phase_execution_exception',
reason: 'all shards failed',
phase: 'query',
grouped: true,
failed_shards: [
{
shard: 0,
index: 'test:.ds-.kibana-event-log-ds-2024.07.31-000001',
node: 'X1aMu4BpQR-7PHi-bEI8Fw',
reason: {
type: 'illegal_argument_exception',
reason:
"Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.",
},
},
],
caused_by: {
type: '',
reason:
"Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.",
caused_by: {
type: 'illegal_argument_exception',
reason:
"Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.",
},
},
},
},
],
},
},
},
hits: { total: { value: 0, relation: 'eq' }, max_score: 0, hits: [] },
})
);
await fetchEsQuery({
ruleId: 'abc',
name: 'test-rule',
params: defaultParams,
timestamp: '2020-02-09T23:15:41.941Z',
services,
spacePrefix: '',
publicBaseUrl: '',
dateStart: new Date().toISOString(),
dateEnd: new Date().toISOString(),
});
expect(mockRuleResultService.addLastRunWarning).toHaveBeenCalledWith(
`Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.`
);
expect(mockRuleResultService.setLastRunOutcomeMessage).toHaveBeenCalledWith(
`Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.`
);
});
});

View file

@ -13,10 +13,11 @@ import {
} from '@kbn/triggers-actions-ui-plugin/common';
import { isGroupAggregation } from '@kbn/triggers-actions-ui-plugin/common';
import { ES_QUERY_ID } from '@kbn/rule-data-utils';
import { PublicRuleResultService } from '@kbn/alerting-plugin/server/types';
import { getComparatorScript } from '../../../../common';
import { OnlyEsQueryRuleParams } from '../types';
import { buildSortedEventsQuery } from '../../../../common/build_sorted_events_query';
import { getParsedQuery } from '../util';
import { getParsedQuery, checkForShardFailures } from '../util';
export interface FetchEsQueryOpts {
ruleId: string;
@ -28,6 +29,7 @@ export interface FetchEsQueryOpts {
services: {
scopedClusterClient: IScopedClusterClient;
logger: Logger;
ruleResultService?: PublicRuleResultService;
};
alertLimit?: number;
dateStart: string;
@ -49,7 +51,7 @@ export async function fetchEsQuery({
dateStart,
dateEnd,
}: FetchEsQueryOpts) {
const { scopedClusterClient, logger } = services;
const { scopedClusterClient, logger, ruleResultService } = services;
const esClient = scopedClusterClient.asCurrentUser;
const isGroupAgg = isGroupAggregation(params.termField);
const isCountAgg = isCountAggregation(params.aggType);
@ -135,6 +137,14 @@ export async function fetchEsQuery({
` es query rule ${ES_QUERY_ID}:${ruleId} "${name}" result - ${JSON.stringify(searchResult)}`
);
// result against CCS indices will return success response with errors nested within
// the _shards or _clusters field; look for these errors and bubble them up
const anyShardFailures = checkForShardFailures(searchResult);
if (anyShardFailures && ruleResultService) {
ruleResultService.addLastRunWarning(anyShardFailures);
ruleResultService.setLastRunOutcomeMessage(anyShardFailures);
}
const link = `${publicBaseUrl}${spacePrefix}/app/management/insightsAndAlerting/triggersActions/rule/${ruleId}`;
return {

View file

@ -6,8 +6,11 @@
*/
import { OnlySearchSourceRuleParams } from '../types';
import {
createSearchSourceMock,
searchSourceInstanceMock,
} from '@kbn/data-plugin/common/search/search_source/mocks';
import { searchSourceCommonMock } from '@kbn/data-plugin/common/search/search_source/mocks';
import { createSearchSourceMock } from '@kbn/data-plugin/common/search/search_source/mocks';
import { loggerMock } from '@kbn/logging-mocks';
import {
updateSearchSource,
@ -26,6 +29,7 @@ import { Comparator } from '../../../../common/comparator_types';
import { dataViewPluginMocks } from '@kbn/data-views-plugin/public/mocks';
import { DiscoverAppLocatorParams } from '@kbn/discover-plugin/common';
import { LocatorPublic } from '@kbn/share-plugin/common';
import { publicRuleResultServiceMock } from '@kbn/alerting-plugin/server/monitoring/rule_result_service.mock';
import { SavedObjectsErrorHelpers } from '@kbn/core-saved-objects-server';
import {
getErrorSource,
@ -72,12 +76,13 @@ const defaultParams: OnlySearchSourceRuleParams = {
};
const logger = loggerMock.create();
const mockRuleResultService = publicRuleResultServiceMock.create();
describe('fetchSearchSourceQuery', () => {
const dataViewMock = createDataView();
afterAll(() => {
jest.resetAllMocks();
jest.clearAllMocks();
});
const fakeNow = new Date('2020-02-09T23:15:41.941Z');
@ -431,6 +436,204 @@ describe('fetchSearchSourceQuery', () => {
expect(logger.warn).toHaveBeenCalledWith('Top hits size is capped at 100');
});
it('should bubble up CCS errors stored in the _shards field of the search result', async () => {
const response = {
took: 16,
timed_out: false,
_shards: {
total: 51,
successful: 48,
skipped: 48,
failed: 3,
failures: [
{
shard: 0,
index: 'ccs-index',
node: '8jMc8jz-Q6qFmKZXfijt-A',
reason: {
type: 'illegal_argument_exception',
reason:
"Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.",
},
},
],
},
hits: {
total: {
value: 0,
relation: 'eq',
},
max_score: 0,
hits: [],
},
};
(searchSourceInstanceMock.getField as jest.Mock).mockImplementationOnce(
jest.fn().mockReturnValue(dataViewMock)
);
(searchSourceInstanceMock.setField as jest.Mock).mockImplementationOnce(
jest.fn().mockReturnValue(undefined)
);
(searchSourceInstanceMock.createChild as jest.Mock).mockImplementationOnce(
jest.fn().mockReturnValue(searchSourceInstanceMock)
);
(searchSourceInstanceMock.fetch as jest.Mock).mockImplementationOnce(
jest.fn().mockReturnValue(response)
);
// const searchSourceInstance = createSearchSourceMock({}, response);
searchSourceCommonMock.createLazy.mockResolvedValueOnce(searchSourceInstanceMock);
await fetchSearchSourceQuery({
ruleId: 'abc',
params: defaultParams,
services: {
logger,
getSearchSourceClient: async () => searchSourceCommonMock,
ruleResultService: mockRuleResultService,
share: {
url: {
// @ts-expect-error
locators: {
get: jest.fn().mockReturnValue({
getRedirectUrl: jest.fn(() => '/app/r?l=DISCOVER_APP_LOCATOR'),
} as unknown as LocatorPublic<DiscoverAppLocatorParams>),
},
},
},
getDataViews: async () => {
return {
...dataViewPluginMocks.createStartContract(),
create: async (spec: DataViewSpec) =>
new DataView({ spec, fieldFormats: fieldFormatsMock }),
};
},
},
spacePrefix: '',
dateStart: new Date().toISOString(),
dateEnd: new Date().toISOString(),
});
expect(mockRuleResultService.addLastRunWarning).toHaveBeenCalledWith(
`Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.`
);
expect(mockRuleResultService.setLastRunOutcomeMessage).toHaveBeenCalledWith(
`Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.`
);
});
it('should bubble up CCS errors stored in the _clusters field of the search result', async () => {
const response = {
took: 6,
timed_out: false,
num_reduce_phases: 0,
_shards: { total: 0, successful: 0, skipped: 0, failed: 0 },
_clusters: {
total: 1,
successful: 0,
skipped: 1,
running: 0,
partial: 0,
failed: 0,
details: {
test: {
status: 'skipped',
indices: '.kibana-event-log*',
timed_out: false,
failures: [
{
shard: -1,
index: null,
reason: {
type: 'search_phase_execution_exception',
reason: 'all shards failed',
phase: 'query',
grouped: true,
failed_shards: [
{
shard: 0,
index: 'test:.ds-.kibana-event-log-ds-2024.07.31-000001',
node: 'X1aMu4BpQR-7PHi-bEI8Fw',
reason: {
type: 'illegal_argument_exception',
reason:
"Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.",
},
},
],
caused_by: {
type: '',
reason:
"Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.",
caused_by: {
type: 'illegal_argument_exception',
reason:
"Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.",
},
},
},
},
],
},
},
},
hits: { total: { value: 0, relation: 'eq' }, max_score: 0, hits: [] },
};
(searchSourceInstanceMock.getField as jest.Mock).mockImplementationOnce(
jest.fn().mockReturnValue(dataViewMock)
);
(searchSourceInstanceMock.setField as jest.Mock).mockImplementationOnce(
jest.fn().mockReturnValue(undefined)
);
(searchSourceInstanceMock.createChild as jest.Mock).mockImplementationOnce(
jest.fn().mockReturnValue(searchSourceInstanceMock)
);
(searchSourceInstanceMock.fetch as jest.Mock).mockImplementationOnce(
jest.fn().mockReturnValue(response)
);
// const searchSourceInstance = createSearchSourceMock({}, response);
searchSourceCommonMock.createLazy.mockResolvedValueOnce(searchSourceInstanceMock);
await fetchSearchSourceQuery({
ruleId: 'abc',
params: defaultParams,
services: {
logger,
getSearchSourceClient: async () => searchSourceCommonMock,
ruleResultService: mockRuleResultService,
share: {
url: {
// @ts-expect-error
locators: {
get: jest.fn().mockReturnValue({
getRedirectUrl: jest.fn(() => '/app/r?l=DISCOVER_APP_LOCATOR'),
} as unknown as LocatorPublic<DiscoverAppLocatorParams>),
},
},
},
getDataViews: async () => {
return {
...dataViewPluginMocks.createStartContract(),
create: async (spec: DataViewSpec) =>
new DataView({ spec, fieldFormats: fieldFormatsMock }),
};
},
},
spacePrefix: '',
dateStart: new Date().toISOString(),
dateEnd: new Date().toISOString(),
});
expect(mockRuleResultService.addLastRunWarning).toHaveBeenCalledWith(
`Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.`
);
expect(mockRuleResultService.setLastRunOutcomeMessage).toHaveBeenCalledWith(
`Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.`
);
});
it('should throw user error if data view is not found', async () => {
searchSourceCommonMock.createLazy.mockImplementationOnce(() => {
throw SavedObjectsErrorHelpers.createGenericNotFoundError('index-pattern', 'abc');

View file

@ -24,9 +24,11 @@ import { SharePluginStart } from '@kbn/share-plugin/server';
import { DiscoverAppLocatorParams } from '@kbn/discover-plugin/common';
import { Logger, SavedObjectsErrorHelpers } from '@kbn/core/server';
import { LocatorPublic } from '@kbn/share-plugin/common';
import { PublicRuleResultService } from '@kbn/alerting-plugin/server/types';
import { createTaskRunError, TaskErrorSource } from '@kbn/task-manager-plugin/server';
import { OnlySearchSourceRuleParams } from '../types';
import { getComparatorScript } from '../../../../common';
import { checkForShardFailures } from '../util';
export interface FetchSearchSourceQueryOpts {
ruleId: string;
@ -39,6 +41,7 @@ export interface FetchSearchSourceQueryOpts {
getSearchSourceClient: () => Promise<ISearchStartSearchSource>;
share: SharePluginStart;
getDataViews: () => Promise<DataViewsContract>;
ruleResultService?: PublicRuleResultService;
};
dateStart: string;
dateEnd: string;
@ -54,7 +57,7 @@ export async function fetchSearchSourceQuery({
dateStart,
dateEnd,
}: FetchSearchSourceQueryOpts) {
const { logger, getSearchSourceClient } = services;
const { logger, getSearchSourceClient, ruleResultService } = services;
const searchSourceClient = await getSearchSourceClient();
const isGroupAgg = isGroupAggregation(params.termField);
const isCountAgg = isCountAggregation(params.aggType);
@ -88,6 +91,14 @@ export async function fetchSearchSourceQuery({
const searchResult = await searchSource.fetch();
// result against CCS indices will return success response with errors nested within
// the _shards or _clusters field; look for these errors and bubble them up
const anyShardFailures = checkForShardFailures(searchResult);
if (anyShardFailures && ruleResultService) {
ruleResultService.addLastRunWarning(anyShardFailures);
ruleResultService.setLastRunOutcomeMessage(anyShardFailures);
}
const link = await generateLink(
initialSearchSource,
services.share.url.locators.get<DiscoverAppLocatorParams>('DISCOVER_APP_LOCATOR')!,

View file

@ -7,7 +7,7 @@
import { OnlyEsQueryRuleParams } from './types';
import { Comparator } from '../../../common/comparator_types';
import { getParsedQuery } from './util';
import { getParsedQuery, checkForShardFailures } from './util';
describe('es_query utils', () => {
const defaultProps = {
@ -48,4 +48,209 @@ describe('es_query utils', () => {
).toThrow('invalid query specified: "{ "someProperty": "test-query" }" - query must be JSON');
});
});
describe('parseShardFailures', () => {
it('should return error message if any failures in the shard response', () => {
expect(
checkForShardFailures({
took: 16,
timed_out: false,
_shards: {
total: 51,
successful: 48,
skipped: 48,
failed: 3,
failures: [
{
shard: 0,
index: 'ccs-index',
node: '8jMc8jz-Q6qFmKZXfijt-A',
reason: {
type: 'illegal_argument_exception',
reason:
"Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.",
},
},
],
},
_clusters: { total: 1, successful: 1, running: 0, partial: 0, failed: 0, skipped: 0 },
hits: { total: { value: 0, relation: 'eq' }, max_score: 0, hits: [] },
})
).toEqual(
`Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.`
);
});
it('should return default error message if malformed error', () => {
expect(
checkForShardFailures({
took: 16,
timed_out: false,
_shards: {
total: 51,
successful: 48,
skipped: 48,
failed: 3,
failures: [
// @ts-expect-error
{
shard: 0,
index: 'ccs-index',
node: '8jMc8jz-Q6qFmKZXfijt-A',
},
],
},
_clusters: { total: 1, successful: 1, running: 0, partial: 0, failed: 0, skipped: 0 },
hits: { total: { value: 0, relation: 'eq' }, max_score: 0, hits: [] },
})
).toEqual(`Search returned partial results due to shard failures.`);
expect(
checkForShardFailures({
took: 16,
timed_out: false,
_shards: { total: 51, successful: 48, skipped: 48, failed: 3, failures: [] },
hits: {
total: {
value: 0,
relation: 'eq',
},
max_score: 0,
hits: [],
},
})
).toEqual(`Search returned partial results due to shard failures.`);
});
it('should return error if any skipped clusters with failures', () => {
expect(
checkForShardFailures({
took: 6,
timed_out: false,
num_reduce_phases: 0,
_shards: { total: 0, successful: 0, skipped: 0, failed: 0 },
_clusters: {
total: 1,
successful: 0,
skipped: 1,
running: 0,
partial: 0,
failed: 0,
details: {
test: {
status: 'skipped',
indices: '.kibana-event-log*',
timed_out: false,
failures: [
{
shard: -1,
// @ts-expect-error
index: null,
reason: {
type: 'search_phase_execution_exception',
reason: 'all shards failed',
phase: 'query',
grouped: true,
failed_shards: [
{
shard: 0,
index: 'test:.ds-.kibana-event-log-ds-2024.07.31-000001',
node: 'X1aMu4BpQR-7PHi-bEI8Fw',
reason: {
type: 'illegal_argument_exception',
reason:
"Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.",
},
},
],
caused_by: {
type: '',
reason:
"Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.",
caused_by: {
type: 'illegal_argument_exception',
reason:
"Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.",
},
},
},
},
],
},
},
},
hits: { total: { value: 0, relation: 'eq' }, max_score: 0, hits: [] },
})
).toEqual(
`Top hits result window is too large, the top hits aggregator [topHitsAgg]'s from + size must be less than or equal to: [100] but was [300]. This limit can be set by changing the [index.max_inner_result_window] index level setting.`
);
});
it('should return default error message if malformed skipped cluster error', () => {
expect(
checkForShardFailures({
took: 6,
timed_out: false,
num_reduce_phases: 0,
_shards: { total: 0, successful: 0, skipped: 0, failed: 0 },
_clusters: {
total: 1,
successful: 0,
skipped: 1,
running: 0,
partial: 0,
failed: 0,
details: {
test: {
status: 'skipped',
indices: '.kibana-event-log*',
timed_out: false,
failures: [],
},
},
},
hits: { total: { value: 0, relation: 'eq' }, max_score: 0, hits: [] },
})
).toEqual(`Search returned partial results due to skipped cluster errors.`);
expect(
checkForShardFailures({
took: 6,
timed_out: false,
num_reduce_phases: 0,
_shards: { total: 0, successful: 0, skipped: 0, failed: 0 },
_clusters: {
total: 1,
successful: 0,
skipped: 1,
running: 0,
partial: 0,
failed: 0,
details: {
test: {
status: 'skipped',
indices: '.kibana-event-log*',
timed_out: false,
// @ts-expect-error
failures: [{ shard: -1 }],
},
},
},
hits: { total: { value: 0, relation: 'eq' }, max_score: 0, hits: [] },
})
).toEqual(`Search returned partial results due to skipped cluster errors.`);
});
it('should return undefined if no failures', () => {
expect(
checkForShardFailures({
took: 16,
timed_out: false,
_shards: { total: 51, successful: 51, skipped: 51, failed: 0, failures: [] },
_clusters: { total: 1, successful: 1, running: 0, partial: 0, failed: 0, skipped: 0 },
hits: { total: { value: 0, relation: 'eq' }, max_score: 0, hits: [] },
})
).toBeUndefined();
});
});
});

View file

@ -6,6 +6,7 @@
*/
import { i18n } from '@kbn/i18n';
import { SearchResponse } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { OnlyEsQueryRuleParams } from './types';
import { EsQueryRuleParams } from './rule_type_params';
@ -46,3 +47,24 @@ function getInvalidQueryError(query: string) {
},
});
}
export function checkForShardFailures(searchResult: SearchResponse<unknown>): string | undefined {
const anyShardsFailed = searchResult?._shards?.failed ?? 0;
if (anyShardsFailed > 0) {
const errorMessage =
searchResult?._shards?.failures?.[0]?.reason?.reason ||
'Search returned partial results due to shard failures.';
return errorMessage;
}
const anyClustersSkipped = searchResult?._clusters?.skipped ?? 0;
if (anyClustersSkipped) {
const details = searchResult?._clusters?.details ?? {};
for (const detail of Object.values(details)) {
const errorMessage =
detail?.failures?.[0]?.reason?.caused_by?.reason ||
'Search returned partial results due to skipped cluster errors.';
return errorMessage;
}
}
}