[Response Ops][Alerting] Switch to logger.warn for no_shard_available_action_exception encountered when collecting telemetry (#205520)

Resolves https://github.com/elastic/response-ops-team/issues/221

## Summary

Looking through the logs for this, the majority of these errors have the
following stack trace (14,000+ in the last 7 days):

```
ResponseError: search_phase_execution_exception
	Root causes:
		no_shard_available_action_exception: [es-es-search-686bf6b747-xldl8][100.65.77.183:9300][indices:data/read/search[phase/query]]
		no_shard_available_action_exception: null
    at KibanaTransport._request (/usr/share/kibana/node_modules/@elastic/transport/lib/Transport.js:543:27)
    at processTicksAndRejections (node:internal/process/task_queues:95:5)
    at /usr/share/kibana/node_modules/@elastic/transport/lib/Transport.js:641:32
    at KibanaTransport.request (/usr/share/kibana/node_modules/@elastic/transport/lib/Transport.js:637:20)
    at KibanaTransport.request (/usr/share/kibana/node_modules/@kbn/core-elasticsearch-client-server-internal/src/create_transport.js:60:16)
    at ClientTraced.SearchApi [as search] (/usr/share/kibana/node_modules/@elastic/elasticsearch/lib/api/api/search.js:72:12)
    at getTotalAlertsCountAggregations (/usr/share/kibana/node_modules/@kbn/alerting-plugin/server/usage/lib/get_telemetry_from_alerts.js:42:21)
    at async Promise.all (index 6)
    at TaskManagerRunner.run (/usr/share/kibana/node_modules/@kbn/task-manager-plugin/server/task_running/task_runner.js:325:22)
```

Looking through the code, we are already catching these errors and
returning a default response for that telemetry object. The
`no_shard_available_action_exception` is not an issue with Kibana, it's
an ES issue, so this PR catches these types of errors and log them at a
debug level instead of a warn level to avoid polluting the logs with
errors we have no control over.

Excluding those results, we see a different and less frequent stack
trace (100+ in the last 15 days):

```
TypeError: Cannot read properties of undefined (reading 'by_rule_type_id')
    at getTotalAlertsCountAggregations (/usr/share/kibana/node_modules/@kbn/alerting-plugin/server/usage/lib/get_telemetry_from_alerts.js:49:109)
    at processTicksAndRejections (node:internal/process/task_queues:95:5)
    at async Promise.all (index 6)
    at TaskManagerRunner.run (/usr/share/kibana/node_modules/@kbn/task-manager-plugin/server/task_running/task_runner.js:325:22)
```

For actions telemetry, the volume of these errors is much lower.
Unfortunately, we are not logging the stack trace for these errors so
it's harder to track down the source. I've updated the code to store the
stack trace as well as adding the same handling for
`no_shard_available_action_exception` under the assumption that it could
also be getting these errors.

---------

Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
This commit is contained in:
Ying Mao 2025-02-03 09:44:03 -05:00 committed by GitHub
parent 504510b92b
commit 4e0c0a785a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 823 additions and 378 deletions

View file

@ -6,7 +6,9 @@
*/
import { elasticsearchClientMock } from '@kbn/core-elasticsearch-client-server-mocks';
import { MockedLogger, loggerMock } from '@kbn/logging-mocks';
import { loggingSystemMock } from '@kbn/core/server/mocks';
import { errors } from '@elastic/elasticsearch';
import {
getCounts,
getExecutionsPerDayCount,
@ -14,9 +16,13 @@ import {
getTotalCount,
} from './actions_telemetry';
const mockLogger = loggingSystemMock.create().get();
let logger: MockedLogger;
describe('actions telemetry', () => {
beforeEach(() => {
logger = loggerMock.create();
});
test('getTotalCount should replace first symbol . to __ for action types names', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockResponse(
@ -107,7 +113,7 @@ describe('actions telemetry', () => {
},
}
);
const telemetry = await getTotalCount(mockEsClient, 'test', mockLogger);
const telemetry = await getTotalCount(mockEsClient, 'test', logger);
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
@ -126,16 +132,24 @@ describe('actions telemetry', () => {
`);
});
test('getTotalCount should return empty results if query throws error', async () => {
test('getTotalCount should return empty results and log warning if query throws error', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockRejectedValue(new Error('oh no'));
const telemetry = await getTotalCount(mockEsClient, 'test', mockLogger);
const telemetry = await getTotalCount(mockEsClient, 'test', logger);
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
expect(mockLogger.warn).toHaveBeenCalledWith(
`Error executing actions telemetry task: getTotalCount - {}`
const loggerCalls = loggingSystemMock.collect(logger);
expect(loggerCalls.debug).toHaveLength(0);
expect(loggerCalls.warn).toHaveLength(1);
expect(loggerCalls.warn[0][0]).toEqual(
`Error executing actions telemetry task: getTotalCount - Error: oh no`
);
// logger meta
expect(loggerCalls.warn[0][1]?.tags).toEqual(['actions', 'telemetry-failed']);
expect(loggerCalls.warn[0][1]?.error?.stack_trace).toBeDefined();
expect(telemetry).toMatchInlineSnapshot(`
Object {
"countByType": Object {},
@ -147,6 +161,60 @@ describe('actions telemetry', () => {
`);
});
test('getTotalCount should return empty results and log debug if query throws search_phase_execution_exception error', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockRejectedValueOnce(
new errors.ResponseError({
warnings: [],
// eslint-disable-next-line @typescript-eslint/no-explicit-any
meta: {} as any,
body: {
error: {
root_cause: [],
type: 'search_phase_execution_exception',
reason: 'no_shard_available_action_exception',
phase: 'fetch',
grouped: true,
failed_shards: [],
caused_by: {
type: 'no_shard_available_action_exception',
reason: 'This is the nested reason',
},
},
},
statusCode: 503,
headers: {},
})
);
const telemetry = await getTotalCount(mockEsClient, 'test', logger);
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
const loggerCalls = loggingSystemMock.collect(logger);
expect(loggerCalls.debug).toHaveLength(1);
expect(loggerCalls.debug[0][0]).toMatchInlineSnapshot(`
"Error executing actions telemetry task: getTotalCount - ResponseError: search_phase_execution_exception
Caused by:
no_shard_available_action_exception: This is the nested reason"
`);
// logger meta
expect(loggerCalls.debug[0][1]?.tags).toEqual(['actions', 'telemetry-failed']);
expect(loggerCalls.debug[0][1]?.error?.stack_trace).toBeDefined();
expect(loggerCalls.warn).toHaveLength(0);
expect(telemetry).toMatchInlineSnapshot(`
Object {
"countByType": Object {},
"countGenAiProviderTypes": Object {},
"countTotal": 0,
"errorMessage": "no_shard_available_action_exception",
"hasErrors": true,
}
`);
});
test('getInUseTotalCount', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockResponseOnce(
@ -202,7 +270,7 @@ describe('actions telemetry', () => {
],
},
});
const telemetry = await getInUseTotalCount(mockEsClient, 'test', mockLogger);
const telemetry = await getInUseTotalCount(mockEsClient, 'test', logger);
expect(mockEsClient.search).toHaveBeenCalledTimes(2);
expect(telemetry).toMatchInlineSnapshot(`
@ -287,7 +355,7 @@ describe('actions telemetry', () => {
],
},
});
const telemetry = await getInUseTotalCount(mockEsClient, 'test', mockLogger, undefined, [
const telemetry = await getInUseTotalCount(mockEsClient, 'test', logger, undefined, [
{
id: 'test',
actionTypeId: '.email',
@ -332,16 +400,23 @@ describe('actions telemetry', () => {
`);
});
test('getInUseTotalCount should return empty results if query throws error', async () => {
test('getInUseTotalCount should return empty results and log warning if query throws error', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockRejectedValue(new Error('oh no'));
const telemetry = await getInUseTotalCount(mockEsClient, 'test', mockLogger);
const telemetry = await getInUseTotalCount(mockEsClient, 'test', logger);
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
expect(mockLogger.warn).toHaveBeenCalledWith(
`Error executing actions telemetry task: getInUseTotalCount - {}`
const loggerCalls = loggingSystemMock.collect(logger);
expect(loggerCalls.debug).toHaveLength(0);
expect(loggerCalls.warn).toHaveLength(1);
expect(loggerCalls.warn[0][0]).toEqual(
`Error executing actions telemetry task: getInUseTotalCount - Error: oh no`
);
// logger meta
expect(loggerCalls.warn[0][1]?.tags).toEqual(['actions', 'telemetry-failed']);
expect(loggerCalls.warn[0][1]?.error?.stack_trace).toBeDefined();
expect(telemetry).toMatchInlineSnapshot(`
Object {
"countByAlertHistoryConnectorType": 0,
@ -355,6 +430,62 @@ describe('actions telemetry', () => {
`);
});
test('getInUseTotalCount should return empty results and log debug if query throws search_phase_execution_exception error', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockRejectedValueOnce(
new errors.ResponseError({
warnings: [],
// eslint-disable-next-line @typescript-eslint/no-explicit-any
meta: {} as any,
body: {
error: {
root_cause: [],
type: 'search_phase_execution_exception',
reason: 'no_shard_available_action_exception',
phase: 'fetch',
grouped: true,
failed_shards: [],
caused_by: {
type: 'no_shard_available_action_exception',
reason: 'This is the nested reason',
},
},
},
statusCode: 503,
headers: {},
})
);
const telemetry = await getInUseTotalCount(mockEsClient, 'test', logger);
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
const loggerCalls = loggingSystemMock.collect(logger);
expect(loggerCalls.debug).toHaveLength(1);
expect(loggerCalls.debug[0][0]).toMatchInlineSnapshot(`
"Error executing actions telemetry task: getInUseTotalCount - ResponseError: search_phase_execution_exception
Caused by:
no_shard_available_action_exception: This is the nested reason"
`);
// logger meta
expect(loggerCalls.debug[0][1]?.tags).toEqual(['actions', 'telemetry-failed']);
expect(loggerCalls.debug[0][1]?.error?.stack_trace).toBeDefined();
expect(loggerCalls.warn).toHaveLength(0);
expect(telemetry).toMatchInlineSnapshot(`
Object {
"countByAlertHistoryConnectorType": 0,
"countByType": Object {},
"countEmailByService": Object {},
"countNamespaces": 0,
"countTotal": 0,
"errorMessage": "no_shard_available_action_exception",
"hasErrors": true,
}
`);
});
test('getTotalCount accounts for preconfigured connectors', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockResponse(
@ -445,7 +576,7 @@ describe('actions telemetry', () => {
},
}
);
const telemetry = await getTotalCount(mockEsClient, 'test', mockLogger, [
const telemetry = await getTotalCount(mockEsClient, 'test', logger, [
{
id: 'test',
actionTypeId: '.test',
@ -501,7 +632,7 @@ describe('actions telemetry', () => {
},
}
);
const telemetry = await getTotalCount(mockEsClient, 'test', mockLogger, [
const telemetry = await getTotalCount(mockEsClient, 'test', logger, [
{
id: 'system_action:system-connector-test.system-action',
actionTypeId: 'test.system-action',
@ -615,7 +746,7 @@ describe('actions telemetry', () => {
],
},
});
const telemetry = await getInUseTotalCount(mockEsClient, 'test', mockLogger, undefined, [
const telemetry = await getInUseTotalCount(mockEsClient, 'test', logger, undefined, [
{
id: 'anotherServerLog',
actionTypeId: '.server-log',
@ -721,7 +852,7 @@ describe('actions telemetry', () => {
},
});
const telemetry = await getInUseTotalCount(mockEsClient, 'test', mockLogger, undefined, []);
const telemetry = await getInUseTotalCount(mockEsClient, 'test', logger, undefined, []);
expect(mockEsClient.search).toHaveBeenCalledTimes(2);
expect(telemetry).toMatchInlineSnapshot(`
@ -827,7 +958,7 @@ describe('actions telemetry', () => {
],
},
});
const telemetry = await getInUseTotalCount(mockEsClient, 'test', mockLogger);
const telemetry = await getInUseTotalCount(mockEsClient, 'test', logger);
expect(mockEsClient.search).toHaveBeenCalledTimes(2);
expect(telemetry).toMatchInlineSnapshot(`
@ -961,7 +1092,7 @@ describe('actions telemetry', () => {
},
}
);
const telemetry = await getExecutionsPerDayCount(mockEsClient, 'test', mockLogger);
const telemetry = await getExecutionsPerDayCount(mockEsClient, 'test', logger);
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
expect(telemetry).toStrictEqual({
@ -995,16 +1126,24 @@ describe('actions telemetry', () => {
});
});
test('getExecutionsPerDayCount should return empty results if query throws error', async () => {
test('getExecutionsPerDayCount should return empty results and log warning if query throws error', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockRejectedValue(new Error('oh no'));
const telemetry = await getExecutionsPerDayCount(mockEsClient, 'test', mockLogger);
const telemetry = await getExecutionsPerDayCount(mockEsClient, 'test', logger);
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
expect(mockLogger.warn).toHaveBeenCalledWith(
`Error executing actions telemetry task: getExecutionsPerDayCount - {}`
const loggerCalls = loggingSystemMock.collect(logger);
expect(loggerCalls.debug).toHaveLength(0);
expect(loggerCalls.warn).toHaveLength(1);
expect(loggerCalls.warn[0][0]).toEqual(
`Error executing actions telemetry task: getExecutionsPerDayCount - Error: oh no`
);
// logger meta
expect(loggerCalls.warn[0][1]?.tags).toEqual(['actions', 'telemetry-failed']);
expect(loggerCalls.warn[0][1]?.error?.stack_trace).toBeDefined();
expect(telemetry).toMatchInlineSnapshot(`
Object {
"avgExecutionTime": 0,
@ -1020,6 +1159,64 @@ describe('actions telemetry', () => {
`);
});
test('getExecutionsPerDayCount should return empty results and log debug if query throws search_phase_execution_exception error', async () => {
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
mockEsClient.search.mockRejectedValueOnce(
new errors.ResponseError({
warnings: [],
// eslint-disable-next-line @typescript-eslint/no-explicit-any
meta: {} as any,
body: {
error: {
root_cause: [],
type: 'search_phase_execution_exception',
reason: 'no_shard_available_action_exception',
phase: 'fetch',
grouped: true,
failed_shards: [],
caused_by: {
type: 'no_shard_available_action_exception',
reason: 'This is the nested reason',
},
},
},
statusCode: 503,
headers: {},
})
);
const telemetry = await getExecutionsPerDayCount(mockEsClient, 'test', logger);
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
const loggerCalls = loggingSystemMock.collect(logger);
expect(loggerCalls.debug).toHaveLength(1);
expect(loggerCalls.debug[0][0]).toMatchInlineSnapshot(`
"Error executing actions telemetry task: getExecutionsPerDayCount - ResponseError: search_phase_execution_exception
Caused by:
no_shard_available_action_exception: This is the nested reason"
`);
// logger meta
expect(loggerCalls.debug[0][1]?.tags).toEqual(['actions', 'telemetry-failed']);
expect(loggerCalls.debug[0][1]?.error?.stack_trace).toBeDefined();
expect(loggerCalls.warn).toHaveLength(0);
expect(telemetry).toMatchInlineSnapshot(`
Object {
"avgExecutionTime": 0,
"avgExecutionTimeByType": Object {},
"countByType": Object {},
"countFailed": 0,
"countFailedByType": Object {},
"countRunOutcomeByConnectorType": Object {},
"countTotal": 0,
"errorMessage": "no_shard_available_action_exception",
"hasErrors": true,
}
`);
});
it('getCounts', () => {
const aggs = {
'.d3security': 2,

View file

@ -20,6 +20,7 @@ import {
getActionExecutions,
getActionsCount,
} from './lib/actions_telemetry_util';
import { parseAndLogError } from './lib/parse_and_log_error';
export interface InMemoryAggRes {
total: number;
@ -108,9 +109,7 @@ export async function getTotalCount(
countGenAiProviderTypes,
};
} catch (err) {
const errorMessage = err && err.message ? err.message : err.toString();
logger.warn(`Error executing actions telemetry task: getTotalCount - ${JSON.stringify(err)}`);
const errorMessage = parseAndLogError(err, `getTotalCount`, logger);
return {
hasErrors: true,
@ -387,11 +386,8 @@ export async function getInUseTotalCount(
countNamespaces: namespacesList.size,
};
} catch (err) {
const errorMessage = err && err.message ? err.message : err.toString();
const errorMessage = parseAndLogError(err, `getInUseTotalCount`, logger);
logger.warn(
`Error executing actions telemetry task: getInUseTotalCount - ${JSON.stringify(err)}`
);
return {
hasErrors: true,
errorMessage,
@ -657,10 +653,8 @@ export async function getExecutionsPerDayCount(
),
};
} catch (err) {
const errorMessage = err && err.message ? err.message : err.toString();
logger.warn(
`Error executing actions telemetry task: getExecutionsPerDayCount - ${JSON.stringify(err)}`
);
const errorMessage = parseAndLogError(err, `getExecutionsPerDayCount`, logger);
return {
hasErrors: true,
errorMessage,

View file

@ -0,0 +1,34 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { Logger } from '@kbn/core/server';
export function parseAndLogError(err: Error, errType: string, logger: Logger): string {
const errorMessage = err && err.message ? err.message : err.toString();
let returnedErrorMessage = errorMessage;
const errorStr = JSON.stringify(err);
const logMessage = `Error executing actions telemetry task: ${errType} - ${err}`;
const logOptions = {
tags: ['actions', 'telemetry-failed'],
error: { stack_trace: err.stack },
};
// If error string contains "no_shard_available_action_exception", debug log it
if (errorStr.includes('no_shard_available_action_exception')) {
// the no_shard_available_action_exception can be wordy and the error message returned from this function
// gets stored in the task state so lets simplify
returnedErrorMessage = 'no_shard_available_action_exception';
if (logger.isLevelEnabled('debug')) {
logger.debug(logMessage, logOptions);
}
} else {
logger.warn(logMessage, logOptions);
}
return returnedErrorMessage;
}

View file

@ -6,64 +6,50 @@
*/
import { elasticsearchServiceMock, loggingSystemMock } from '@kbn/core/server/mocks';
import { MockedLogger, loggerMock } from '@kbn/logging-mocks';
import { getTotalAlertsCountAggregations } from './get_telemetry_from_alerts';
import { errors } from '@elastic/elasticsearch';
const elasticsearch = elasticsearchServiceMock.createStart();
const esClient = elasticsearch.client.asInternalUser;
const logger: ReturnType<typeof loggingSystemMock.createLogger> = loggingSystemMock.createLogger();
let logger: MockedLogger;
describe('kibana index telemetry', () => {
beforeEach(() => {
jest.resetAllMocks();
logger = loggerMock.create();
});
it('should return total alert couts and alert counts by rule type id', async () => {
it('should return total alert counts and alert counts by rule type id', async () => {
esClient.search.mockResponseOnce({
took: 4,
timed_out: false,
_shards: {
total: 1,
successful: 1,
skipped: 0,
failed: 0,
},
hits: {
total: {
value: 6,
relation: 'eq',
},
max_score: null,
hits: [],
},
_shards: { total: 1, successful: 1, skipped: 0, failed: 0 },
hits: { total: { value: 6, relation: 'eq' }, max_score: null, hits: [] },
aggregations: {
by_rule_type_id: {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [
{
key: '.index-threshold',
doc_count: 1,
},
{
key: 'logs.alert.document.count',
doc_count: 2,
},
{
key: 'document.test.',
doc_count: 3,
},
{ key: '.index-threshold', doc_count: 1 },
{ key: 'logs.alert.document.count', doc_count: 2 },
{ key: 'document.test.', doc_count: 3 },
],
},
},
});
const telemetry = await getTotalAlertsCountAggregations({
esClient,
logger,
});
const telemetry = await getTotalAlertsCountAggregations({ esClient, logger });
expect(esClient.search).toHaveBeenCalledTimes(1);
expect(logger.debug).toHaveBeenCalledTimes(2);
const debugLogs = loggingSystemMock.collect(logger).debug;
expect(debugLogs).toHaveLength(2);
expect(debugLogs[0][0]).toEqual(
`query for getTotalAlertsCountAggregations - {\"index\":\".alerts-*\",\"size\":0,\"body\":{\"query\":{\"match_all\":{}},\"aggs\":{\"by_rule_type_id\":{\"terms\":{\"field\":\"kibana.alert.rule.rule_type_id\",\"size\":33}}}}}`
);
expect(debugLogs[1][0]).toEqual(
`results for getTotalAlertsCountAggregations query - {\"took\":4,\"timed_out\":false,\"_shards\":{\"total\":1,\"successful\":1,\"skipped\":0,\"failed\":0},\"hits\":{\"total\":{\"value\":6,\"relation\":\"eq\"},\"max_score\":null,\"hits\":[]},\"aggregations\":{\"by_rule_type_id\":{\"doc_count_error_upper_bound\":0,\"sum_other_doc_count\":0,\"buckets\":[{\"key\":\".index-threshold\",\"doc_count\":1},{\"key\":\"logs.alert.document.count\",\"doc_count\":2},{\"key\":\"document.test.\",\"doc_count\":3}]}}}`
);
expect(telemetry).toEqual({
hasErrors: false,
@ -77,37 +63,18 @@ describe('kibana index telemetry', () => {
});
});
it('should return ', async () => {
it('should return on empty results', async () => {
esClient.search.mockResponseOnce({
took: 4,
timed_out: false,
_shards: {
total: 1,
successful: 1,
skipped: 0,
failed: 0,
},
hits: {
total: {
value: 0,
relation: 'eq',
},
max_score: null,
hits: [],
},
_shards: { total: 1, successful: 1, skipped: 0, failed: 0 },
hits: { total: { value: 0, relation: 'eq' }, max_score: null, hits: [] },
aggregations: {
by_rule_type_id: {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [],
},
by_rule_type_id: { doc_count_error_upper_bound: 0, sum_other_doc_count: 0, buckets: [] },
},
});
const telemetry = await getTotalAlertsCountAggregations({
esClient,
logger,
});
const telemetry = await getTotalAlertsCountAggregations({ esClient, logger });
expect(telemetry).toEqual({
hasErrors: false,
@ -116,17 +83,25 @@ describe('kibana index telemetry', () => {
});
});
test('should return empty results and log warning if query throws error', async () => {
it('should return empty results and log warning if query throws error', async () => {
esClient.search.mockRejectedValueOnce(new Error('test'));
const telemetry = await getTotalAlertsCountAggregations({
esClient,
logger,
});
const telemetry = await getTotalAlertsCountAggregations({ esClient, logger });
expect(esClient.search).toHaveBeenCalledTimes(1);
expect(logger.debug).toHaveBeenCalledTimes(1);
expect(logger.warn).toHaveBeenCalledTimes(1);
const loggerCalls = loggingSystemMock.collect(logger);
expect(loggerCalls.debug).toHaveLength(1);
expect(loggerCalls.debug[0][0]).toEqual(
`query for getTotalAlertsCountAggregations - {\"index\":\".alerts-*\",\"size\":0,\"body\":{\"query\":{\"match_all\":{}},\"aggs\":{\"by_rule_type_id\":{\"terms\":{\"field\":\"kibana.alert.rule.rule_type_id\",\"size\":33}}}}}`
);
expect(loggerCalls.warn).toHaveLength(1);
expect(loggerCalls.warn[0][0]).toEqual(
`Error executing alerting telemetry task: getTotalAlertsCountAggregations - Error: test`
);
// logger meta
expect(loggerCalls.warn[0][1]?.tags).toEqual(['alerting', 'telemetry-failed']);
expect(loggerCalls.warn[0][1]?.error?.stack_trace).toBeDefined();
expect(telemetry).toEqual({
hasErrors: true,
@ -135,4 +110,56 @@ describe('kibana index telemetry', () => {
count_alerts_by_rule_type: {},
});
});
it('should return empty results and log debug log if query throws search_phase_execution_exception error', async () => {
esClient.search.mockRejectedValueOnce(
new errors.ResponseError({
warnings: [],
// eslint-disable-next-line @typescript-eslint/no-explicit-any
meta: {} as any,
body: {
error: {
root_cause: [],
type: 'search_phase_execution_exception',
reason: 'no_shard_available_action_exception',
phase: 'fetch',
grouped: true,
failed_shards: [],
caused_by: {
type: 'no_shard_available_action_exception',
reason: 'This is the nested reason',
},
},
},
statusCode: 503,
headers: {},
})
);
const telemetry = await getTotalAlertsCountAggregations({ esClient, logger });
expect(esClient.search).toHaveBeenCalledTimes(1);
const loggerCalls = loggingSystemMock.collect(logger);
expect(loggerCalls.debug).toHaveLength(2);
expect(loggerCalls.debug[0][0]).toEqual(
`query for getTotalAlertsCountAggregations - {\"index\":\".alerts-*\",\"size\":0,\"body\":{\"query\":{\"match_all\":{}},\"aggs\":{\"by_rule_type_id\":{\"terms\":{\"field\":\"kibana.alert.rule.rule_type_id\",\"size\":33}}}}}`
);
expect(loggerCalls.debug[1][0]).toMatchInlineSnapshot(`
"Error executing alerting telemetry task: getTotalAlertsCountAggregations - ResponseError: search_phase_execution_exception
Caused by:
no_shard_available_action_exception: This is the nested reason"
`);
// logger meta
expect(loggerCalls.debug[1][1]?.tags).toEqual(['alerting', 'telemetry-failed']);
expect(loggerCalls.debug[1][1]?.error?.stack_trace).toBeDefined();
expect(loggerCalls.warn).toHaveLength(0);
expect(telemetry).toEqual({
hasErrors: true,
errorMessage: `no_shard_available_action_exception`,
count_alerts_total: 0,
count_alerts_by_rule_type: {},
});
});
});

View file

@ -14,6 +14,7 @@ import { ElasticsearchClient, Logger } from '@kbn/core/server';
import { NUM_ALERTING_RULE_TYPES } from '../alerting_usage_collector';
import { parseSimpleRuleTypeBucket } from './parse_simple_rule_type_bucket';
import { AlertingUsage } from '../types';
import { parseAndLogError } from './parse_and_log_error';
interface Opts {
esClient: ElasticsearchClient;
@ -69,20 +70,10 @@ export async function getTotalAlertsCountAggregations({
return {
hasErrors: false,
count_alerts_total: totalAlertsCount ?? 0,
count_alerts_by_rule_type: parseSimpleRuleTypeBucket(aggregations.by_rule_type_id.buckets),
count_alerts_by_rule_type: parseSimpleRuleTypeBucket(aggregations?.by_rule_type_id?.buckets),
};
} catch (err) {
const errorMessage = err && err.message ? err.message : err.toString();
logger.warn(
`Error executing alerting telemetry task: getTotalAlertsCountAggregations - ${JSON.stringify(
err
)}`,
{
tags: ['alerting', 'telemetry-failed'],
error: { stack_trace: err.stack },
}
);
const errorMessage = parseAndLogError(err, `getTotalAlertsCountAggregations`, logger);
return {
hasErrors: true,

View file

@ -6,6 +6,8 @@
*/
import { elasticsearchServiceMock, loggingSystemMock } from '@kbn/core/server/mocks';
import { MockedLogger, loggerMock } from '@kbn/logging-mocks';
import { errors } from '@elastic/elasticsearch';
import {
getExecutionsPerDayCount,
parseExecutionFailureByRuleType,
@ -17,11 +19,12 @@ import {
const elasticsearch = elasticsearchServiceMock.createStart();
const esClient = elasticsearch.client.asInternalUser;
const logger: ReturnType<typeof loggingSystemMock.createLogger> = loggingSystemMock.createLogger();
let logger: MockedLogger;
describe('event log telemetry', () => {
beforeEach(() => {
jest.resetAllMocks();
logger = loggerMock.create();
});
describe('parseRuleTypeBucket', () => {
@ -1106,20 +1109,8 @@ describe('event log telemetry', () => {
esClient.search.mockResponse({
took: 4,
timed_out: false,
_shards: {
total: 1,
successful: 1,
skipped: 0,
failed: 0,
},
hits: {
total: {
value: 148,
relation: 'eq',
},
max_score: null,
hits: [],
},
_shards: { total: 1, successful: 1, skipped: 0, failed: 0 },
hits: { total: { value: 148, relation: 'eq' }, max_score: null, hits: [] },
aggregations: {
by_rule_type_id: {
doc_count_error_upper_bound: 0,
@ -1302,11 +1293,7 @@ describe('event log telemetry', () => {
},
});
const telemetry = await getExecutionsPerDayCount({
esClient,
eventLogIndex: 'test',
logger,
});
const telemetry = await getExecutionsPerDayCount({ esClient, eventLogIndex: 'test', logger });
expect(esClient.search).toHaveBeenCalledTimes(1);
@ -1416,17 +1403,14 @@ describe('event log telemetry', () => {
test('should return empty results and log warning if query throws error', async () => {
esClient.search.mockRejectedValue(new Error('oh no'));
const telemetry = await getExecutionsPerDayCount({
esClient,
eventLogIndex: 'test',
logger,
});
const telemetry = await getExecutionsPerDayCount({ esClient, eventLogIndex: 'test', logger });
expect(esClient.search).toHaveBeenCalledTimes(1);
const loggerCall = logger.warn.mock.calls[0][0];
const loggerMeta = logger.warn.mock.calls[0][1];
expect(loggerCall as string).toMatchInlineSnapshot(
`"Error executing alerting telemetry task: getExecutionsPerDayCount - {}"`
`"Error executing alerting telemetry task: getExecutionsPerDayCount - Error: oh no"`
);
expect(loggerMeta?.tags).toEqual(['alerting', 'telemetry-failed']);
expect(loggerMeta?.error?.stack_trace).toBeDefined();
@ -1451,6 +1435,72 @@ describe('event log telemetry', () => {
countRulesByExecutionStatus: {},
});
});
it('should return empty results and log debug log if query throws search_phase_execution_exception error', async () => {
esClient.search.mockRejectedValueOnce(
new errors.ResponseError({
warnings: [],
// eslint-disable-next-line @typescript-eslint/no-explicit-any
meta: {} as any,
body: {
error: {
root_cause: [],
type: 'search_phase_execution_exception',
reason: 'no_shard_available_action_exception',
phase: 'fetch',
grouped: true,
failed_shards: [],
caused_by: {
type: 'no_shard_available_action_exception',
reason: 'This is the nested reason',
},
},
},
statusCode: 503,
headers: {},
})
);
const telemetry = await getExecutionsPerDayCount({ esClient, eventLogIndex: 'test', logger });
expect(esClient.search).toHaveBeenCalledTimes(1);
const loggerCalls = loggingSystemMock.collect(logger);
expect(loggerCalls.debug).toHaveLength(2);
expect(loggerCalls.debug[0][0]).toEqual(
`query for getExecutionsPerDayCount - {\"index\":\"test\",\"size\":0,\"body\":{\"query\":{\"bool\":{\"filter\":{\"bool\":{\"must\":[{\"term\":{\"event.action\":\"execute\"}},{\"term\":{\"event.provider\":\"alerting\"}},{\"range\":{\"@timestamp\":{\"gte\":\"now-1d\"}}}]}}}},\"aggs\":{\"avg_execution_time\":{\"avg\":{\"field\":\"event.duration\"}},\"avg_es_search_duration\":{\"avg\":{\"field\":\"kibana.alert.rule.execution.metrics.es_search_duration_ms\"}},\"avg_total_search_duration\":{\"avg\":{\"field\":\"kibana.alert.rule.execution.metrics.total_search_duration_ms\"}},\"percentile_scheduled_actions\":{\"percentiles\":{\"field\":\"kibana.alert.rule.execution.metrics.number_of_generated_actions\",\"percents\":[50,90,99]}},\"percentile_alerts\":{\"percentiles\":{\"field\":\"kibana.alert.rule.execution.metrics.alert_counts.active\",\"percents\":[50,90,99]}},\"execution_failures\":{\"filter\":{\"term\":{\"event.outcome\":\"failure\"}},\"aggs\":{\"by_reason\":{\"terms\":{\"field\":\"event.reason\",\"size\":5}}}},\"by_rule_type_id\":{\"terms\":{\"field\":\"rule.category\",\"size\":33},\"aggs\":{\"avg_execution_time\":{\"avg\":{\"field\":\"event.duration\"}},\"avg_es_search_duration\":{\"avg\":{\"field\":\"kibana.alert.rule.execution.metrics.es_search_duration_ms\"}},\"avg_total_search_duration\":{\"avg\":{\"field\":\"kibana.alert.rule.execution.metrics.total_search_duration_ms\"}},\"percentile_scheduled_actions\":{\"percentiles\":{\"field\":\"kibana.alert.rule.execution.metrics.number_of_generated_actions\",\"percents\":[50,90,99]}},\"percentile_alerts\":{\"percentiles\":{\"field\":\"kibana.alert.rule.execution.metrics.alert_counts.active\",\"percents\":[50,90,99]}},\"execution_failures\":{\"filter\":{\"term\":{\"event.outcome\":\"failure\"}},\"aggs\":{\"by_reason\":{\"terms\":{\"field\":\"event.reason\",\"size\":5}}}}}},\"by_execution_status\":{\"terms\":{\"field\":\"event.outcome\"}}}}}`
);
expect(loggerCalls.debug[1][0]).toMatchInlineSnapshot(`
"Error executing alerting telemetry task: getExecutionsPerDayCount - ResponseError: search_phase_execution_exception
Caused by:
no_shard_available_action_exception: This is the nested reason"
`);
// logger meta
expect(loggerCalls.debug[1][1]?.tags).toEqual(['alerting', 'telemetry-failed']);
expect(loggerCalls.debug[1][1]?.error?.stack_trace).toBeDefined();
expect(loggerCalls.warn).toHaveLength(0);
expect(telemetry).toStrictEqual({
hasErrors: true,
errorMessage: 'no_shard_available_action_exception',
countTotalRuleExecutions: 0,
countRuleExecutionsByType: {},
countTotalFailedExecutions: 0,
countFailedExecutionsByReason: {},
countFailedExecutionsByReasonByType: {},
avgExecutionTime: 0,
avgExecutionTimeByType: {},
avgEsSearchDuration: 0,
avgEsSearchDurationByType: {},
avgTotalSearchDuration: 0,
avgTotalSearchDurationByType: {},
generatedActionsPercentiles: {},
generatedActionsPercentilesByType: {},
alertsPercentiles: {},
alertsPercentilesByType: {},
countRulesByExecutionStatus: {},
});
});
});
describe('getExecutionTimeoutsPerDayCount', () => {
@ -1458,37 +1508,16 @@ describe('event log telemetry', () => {
esClient.search.mockResponse({
took: 4,
timed_out: false,
_shards: {
total: 1,
successful: 1,
skipped: 0,
failed: 0,
},
hits: {
total: {
value: 4,
relation: 'eq',
},
max_score: null,
hits: [],
},
_shards: { total: 1, successful: 1, skipped: 0, failed: 0 },
hits: { total: { value: 4, relation: 'eq' }, max_score: null, hits: [] },
aggregations: {
by_rule_type_id: {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [
{
key: '.index-threshold',
doc_count: 2,
},
{
key: 'logs.alert.document.count',
doc_count: 1,
},
{
key: 'document.test.',
doc_count: 1,
},
{ key: '.index-threshold', doc_count: 2 },
{ key: 'logs.alert.document.count', doc_count: 1 },
{ key: 'document.test.', doc_count: 1 },
],
},
},
@ -1527,7 +1556,7 @@ describe('event log telemetry', () => {
const loggerCall = logger.warn.mock.calls[0][0];
const loggerMeta = logger.warn.mock.calls[0][1];
expect(loggerCall as string).toMatchInlineSnapshot(
`"Error executing alerting telemetry task: getExecutionsTimeoutsPerDayCount - {}"`
`"Error executing alerting telemetry task: getExecutionsTimeoutsPerDayCount - Error: oh no"`
);
expect(loggerMeta?.tags).toEqual(['alerting', 'telemetry-failed']);
expect(loggerMeta?.error?.stack_trace).toBeDefined();
@ -1538,5 +1567,61 @@ describe('event log telemetry', () => {
hasErrors: true,
});
});
it('should return empty results and log debug log if query throws search_phase_execution_exception error', async () => {
esClient.search.mockRejectedValueOnce(
new errors.ResponseError({
warnings: [],
// eslint-disable-next-line @typescript-eslint/no-explicit-any
meta: {} as any,
body: {
error: {
root_cause: [],
type: 'search_phase_execution_exception',
reason: 'no_shard_available_action_exception',
phase: 'fetch',
grouped: true,
failed_shards: [],
caused_by: {
type: 'no_shard_available_action_exception',
reason: 'This is the nested reason',
},
},
},
statusCode: 503,
headers: {},
})
);
const telemetry = await getExecutionTimeoutsPerDayCount({
esClient,
eventLogIndex: 'test',
logger,
});
expect(esClient.search).toHaveBeenCalledTimes(1);
const loggerCalls = loggingSystemMock.collect(logger);
expect(loggerCalls.debug).toHaveLength(2);
expect(loggerCalls.debug[0][0]).toEqual(
`query for getExecutionTimeoutsPerDayCount - {\"index\":\"test\",\"size\":0,\"body\":{\"query\":{\"bool\":{\"filter\":{\"bool\":{\"must\":[{\"term\":{\"event.action\":\"execute-timeout\"}},{\"term\":{\"event.provider\":\"alerting\"}},{\"range\":{\"@timestamp\":{\"gte\":\"now-1d\"}}}]}}}},\"aggs\":{\"by_rule_type_id\":{\"terms\":{\"field\":\"rule.category\",\"size\":33}}}}}`
);
expect(loggerCalls.debug[1][0]).toMatchInlineSnapshot(`
"Error executing alerting telemetry task: getExecutionsTimeoutsPerDayCount - ResponseError: search_phase_execution_exception
Caused by:
no_shard_available_action_exception: This is the nested reason"
`);
// logger meta
expect(loggerCalls.debug[1][1]?.tags).toEqual(['alerting', 'telemetry-failed']);
expect(loggerCalls.debug[1][1]?.error?.stack_trace).toBeDefined();
expect(loggerCalls.warn).toHaveLength(0);
expect(telemetry).toStrictEqual({
hasErrors: true,
errorMessage: 'no_shard_available_action_exception',
countExecutionTimeouts: 0,
countExecutionTimeoutsByType: {},
});
});
});
});

View file

@ -22,6 +22,7 @@ import {
} from '../alerting_usage_collector';
import { replaceDotSymbols } from './replace_dots_with_underscores';
import { parseSimpleRuleTypeBucket } from './parse_simple_rule_type_bucket';
import { parseAndLogError } from './parse_and_log_error';
const Millis2Nanos = 1000 * 1000;
const percentileFieldNameMapping: Record<string, string> = {
@ -189,14 +190,8 @@ export async function getExecutionsPerDayCount({
),
};
} catch (err) {
const errorMessage = err && err.message ? err.message : err.toString();
logger.warn(
`Error executing alerting telemetry task: getExecutionsPerDayCount - ${JSON.stringify(err)}`,
{
tags: ['alerting', 'telemetry-failed'],
error: { stack_trace: err.stack },
}
);
const errorMessage = parseAndLogError(err, `getExecutionsPerDayCount`, logger);
return {
hasErrors: true,
errorMessage,
@ -262,17 +257,8 @@ export async function getExecutionTimeoutsPerDayCount({
countExecutionTimeoutsByType: parseSimpleRuleTypeBucket(aggregations.by_rule_type_id.buckets),
};
} catch (err) {
const errorMessage = err && err.message ? err.message : err.toString();
const errorMessage = parseAndLogError(err, `getExecutionsTimeoutsPerDayCount`, logger);
logger.warn(
`Error executing alerting telemetry task: getExecutionsTimeoutsPerDayCount - ${JSON.stringify(
err
)}`,
{
tags: ['alerting', 'telemetry-failed'],
error: { stack_trace: err.stack },
}
);
return {
hasErrors: true,
errorMessage,

File diff suppressed because one or more lines are too long

View file

@ -25,6 +25,7 @@ import { parseSimpleRuleTypeBucket } from './parse_simple_rule_type_bucket';
import { groupRulesBySearchType } from './group_rules_by_search_type';
import { MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE } from '../../../common';
import { MaintenanceWindowAttributes } from '../../data/maintenance_window/types';
import { parseAndLogError } from './parse_and_log_error';
interface Opts {
esClient: ElasticsearchClient;
@ -376,15 +377,8 @@ export async function getTotalCountAggregations({
},
};
} catch (err) {
const errorMessage = err && err.message ? err.message : err.toString();
const errorMessage = parseAndLogError(err, `getTotalCountAggregations`, logger);
logger.warn(
`Error executing alerting telemetry task: getTotalCountAggregations - ${JSON.stringify(err)}`,
{
tags: ['alerting', 'telemetry-failed'],
error: { stack_trace: err.stack },
}
);
return {
hasErrors: true,
errorMessage,
@ -491,14 +485,8 @@ export async function getTotalCountInUse({
countNamespaces: aggregations.namespaces_count.value ?? 0,
};
} catch (err) {
const errorMessage = err && err.message ? err.message : err.toString();
logger.warn(
`Error executing alerting telemetry task: getTotalCountInUse - ${JSON.stringify(err)}`,
{
tags: ['alerting', 'telemetry-failed'],
error: { stack_trace: err.stack },
}
);
const errorMessage = parseAndLogError(err, `getTotalCountInUse`, logger);
return {
hasErrors: true,
errorMessage,
@ -548,14 +536,8 @@ export async function getMWTelemetry({
count_mw_with_filter_alert_toggle_on: countMWWithFilterAlertToggleON,
};
} catch (err) {
const errorMessage = err?.message ? err.message : err.toString();
logger.warn(
`Error executing alerting telemetry task: getTotalMWCount - ${JSON.stringify(err)}`,
{
tags: ['alerting', 'telemetry-failed'],
error: { stack_trace: err?.stack },
}
);
const errorMessage = parseAndLogError(err, `getTotalMWCount`, logger);
return {
hasErrors: true,
errorMessage,

View file

@ -5,7 +5,9 @@
* 2.0.
*/
import { errors } from '@elastic/elasticsearch';
import { elasticsearchServiceMock, loggingSystemMock } from '@kbn/core/server/mocks';
import { MockedLogger, loggerMock } from '@kbn/logging-mocks';
import {
getFailedAndUnrecognizedTasksPerDay,
parseBucket,
@ -13,11 +15,12 @@ import {
const elasticsearch = elasticsearchServiceMock.createStart();
const esClient = elasticsearch.client.asInternalUser;
const logger: ReturnType<typeof loggingSystemMock.createLogger> = loggingSystemMock.createLogger();
let logger: MockedLogger;
describe('task manager telemetry', () => {
beforeEach(() => {
jest.resetAllMocks();
logger = loggerMock.create();
});
describe('parseBucket', () => {
@ -145,20 +148,8 @@ describe('task manager telemetry', () => {
esClient.search.mockResponse({
took: 4,
timed_out: false,
_shards: {
total: 1,
successful: 1,
skipped: 0,
failed: 0,
},
hits: {
total: {
value: 40,
relation: 'eq',
},
max_score: null,
hits: [],
},
_shards: { total: 1, successful: 1, skipped: 0, failed: 0 },
hits: { total: { value: 40, relation: 'eq' }, max_score: null, hits: [] },
aggregations: {
by_status: {
doc_count_error_upper_bound: 0,
@ -243,7 +234,7 @@ describe('task manager telemetry', () => {
const loggerCall = logger.warn.mock.calls[0][0];
const loggerMeta = logger.warn.mock.calls[0][1];
expect(loggerCall as string).toMatchInlineSnapshot(
`"Error executing alerting telemetry task: getFailedAndUnrecognizedTasksPerDay - {}"`
`"Error executing alerting telemetry task: getFailedAndUnrecognizedTasksPerDay - Error: oh no"`
);
expect(loggerMeta?.tags).toEqual(['alerting', 'telemetry-failed']);
expect(loggerMeta?.error?.stack_trace).toBeDefined();
@ -255,5 +246,62 @@ describe('task manager telemetry', () => {
countFailedAndUnrecognizedTasksByStatusByType: {},
});
});
test('should return empty results and log debug log if query throws search_phase_execution_exception error', async () => {
esClient.search.mockRejectedValueOnce(
new errors.ResponseError({
warnings: [],
// eslint-disable-next-line @typescript-eslint/no-explicit-any
meta: {} as any,
body: {
error: {
root_cause: [],
type: 'search_phase_execution_exception',
reason: 'no_shard_available_action_exception',
phase: 'fetch',
grouped: true,
failed_shards: [],
caused_by: {
type: 'no_shard_available_action_exception',
reason: 'This is the nested reason',
},
},
},
statusCode: 503,
headers: {},
})
);
const telemetry = await getFailedAndUnrecognizedTasksPerDay({
esClient,
taskManagerIndex: 'test',
logger,
});
expect(esClient.search).toHaveBeenCalledTimes(1);
const loggerCalls = loggingSystemMock.collect(logger);
expect(loggerCalls.debug).toHaveLength(2);
expect(loggerCalls.debug[0][0]).toEqual(
`query for getFailedAndUnrecognizedTasksPerDay - {\"index\":\"test\",\"size\":0,\"body\":{\"query\":{\"bool\":{\"must\":[{\"bool\":{\"should\":[{\"term\":{\"task.status\":\"unrecognized\"}},{\"term\":{\"task.status\":\"failed\"}}]}},{\"wildcard\":{\"task.taskType\":{\"value\":\"alerting:*\"}}},{\"range\":{\"task.runAt\":{\"gte\":\"now-1d\"}}}]}},\"aggs\":{\"by_status\":{\"terms\":{\"field\":\"task.status\",\"size\":10},\"aggs\":{\"by_task_type\":{\"terms\":{\"field\":\"task.taskType\",\"size\":33}}}}}}}`
);
expect(loggerCalls.debug[1][0]).toMatchInlineSnapshot(`
"Error executing alerting telemetry task: getFailedAndUnrecognizedTasksPerDay - ResponseError: search_phase_execution_exception
Caused by:
no_shard_available_action_exception: This is the nested reason"
`);
// logger meta
expect(loggerCalls.debug[1][1]?.tags).toEqual(['alerting', 'telemetry-failed']);
expect(loggerCalls.debug[1][1]?.error?.stack_trace).toBeDefined();
expect(loggerCalls.warn).toHaveLength(0);
expect(telemetry).toStrictEqual({
errorMessage: 'no_shard_available_action_exception',
hasErrors: true,
countFailedAndUnrecognizedTasks: 0,
countFailedAndUnrecognizedTasksByStatus: {},
countFailedAndUnrecognizedTasksByStatusByType: {},
});
});
});
});

View file

@ -14,6 +14,7 @@ import type {
import { ElasticsearchClient, Logger } from '@kbn/core/server';
import { replaceDotSymbols } from './replace_dots_with_underscores';
import { NUM_ALERTING_RULE_TYPES } from '../alerting_usage_collector';
import { parseAndLogError } from './parse_and_log_error';
interface Opts {
esClient: ElasticsearchClient;
@ -122,16 +123,8 @@ export async function getFailedAndUnrecognizedTasksPerDay({
countFailedAndUnrecognizedTasks: totalFailedAndUnrecognizedTasks ?? 0,
};
} catch (err) {
const errorMessage = err && err.message ? err.message : err.toString();
logger.warn(
`Error executing alerting telemetry task: getFailedAndUnrecognizedTasksPerDay - ${JSON.stringify(
err
)}`,
{
tags: ['alerting', 'telemetry-failed'],
error: { stack_trace: err.stack },
}
);
const errorMessage = parseAndLogError(err, `getFailedAndUnrecognizedTasksPerDay`, logger);
return {
hasErrors: true,
errorMessage,

View file

@ -0,0 +1,34 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { Logger } from '@kbn/core/server';
export function parseAndLogError(err: Error, errType: string, logger: Logger): string {
const errorMessage = err && err.message ? err.message : err.toString();
let returnedErrorMessage = errorMessage;
const errorStr = JSON.stringify(err);
const logMessage = `Error executing alerting telemetry task: ${errType} - ${err}`;
const logOptions = {
tags: ['alerting', 'telemetry-failed'],
error: { stack_trace: err.stack },
};
// If error string contains "no_shard_available_action_exception", debug log it
if (errorStr.includes('no_shard_available_action_exception')) {
// the no_shard_available_action_exception can be wordy and the error message returned from this function
// gets stored in the task state so lets simplify
returnedErrorMessage = 'no_shard_available_action_exception';
if (logger.isLevelEnabled('debug')) {
logger.debug(logMessage, logOptions);
}
} else {
logger.warn(logMessage, logOptions);
}
return returnedErrorMessage;
}