mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 09:48:58 -04:00
[8.x] [Response Ops][Alerting] Switch to `logger.warn` for `no_shard_available_action_exception` encountered when collecting telemetry (#205520) (#209323)
# Backport This will backport the following commits from `main` to `8.x`: - [[Response Ops][Alerting] Switch to `logger.warn` for `no_shard_available_action_exception` encountered when collecting telemetry (#205520)](https://github.com/elastic/kibana/pull/205520) <!--- Backport version: 9.4.3 --> ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport) <!--BACKPORT [{"author":{"name":"Ying Mao","email":"ying.mao@elastic.co"},"sourceCommit":{"committedDate":"2025-02-03T14:44:03Z","message":"[Response Ops][Alerting] Switch to `logger.warn` for `no_shard_available_action_exception` encountered when collecting telemetry (#205520)\n\nResolves https://github.com/elastic/response-ops-team/issues/221\r\n\r\n## Summary\r\n\r\nLooking through the logs for this, the majority of these errors have the\r\nfollowing stack trace (14,000+ in the last 7 days):\r\n\r\n```\r\nResponseError: search_phase_execution_exception\r\n\tRoot causes:\r\n\t\tno_shard_available_action_exception: [es-es-search-686bf6b747-xldl8][100.65.77.183:9300][indices:data/read/search[phase/query]]\r\n\t\tno_shard_available_action_exception: null\r\n at KibanaTransport._request (/usr/share/kibana/node_modules/@elastic/transport/lib/Transport.js:543:27)\r\n at processTicksAndRejections (node:internal/process/task_queues:95:5)\r\n at /usr/share/kibana/node_modules/@elastic/transport/lib/Transport.js:641:32\r\n at KibanaTransport.request (/usr/share/kibana/node_modules/@elastic/transport/lib/Transport.js:637:20)\r\n at KibanaTransport.request (/usr/share/kibana/node_modules/@kbn/core-elasticsearch-client-server-internal/src/create_transport.js:60:16)\r\n at ClientTraced.SearchApi [as search] (/usr/share/kibana/node_modules/@elastic/elasticsearch/lib/api/api/search.js:72:12)\r\n at getTotalAlertsCountAggregations (/usr/share/kibana/node_modules/@kbn/alerting-plugin/server/usage/lib/get_telemetry_from_alerts.js:42:21)\r\n at async Promise.all (index 6)\r\n at TaskManagerRunner.run (/usr/share/kibana/node_modules/@kbn/task-manager-plugin/server/task_running/task_runner.js:325:22)\r\n```\r\n\r\nLooking through the code, we are already catching these errors and\r\nreturning a default response for that telemetry object. The\r\n`no_shard_available_action_exception` is not an issue with Kibana, it's\r\nan ES issue, so this PR catches these types of errors and log them at a\r\ndebug level instead of a warn level to avoid polluting the logs with\r\nerrors we have no control over.\r\n\r\nExcluding those results, we see a different and less frequent stack\r\ntrace (100+ in the last 15 days):\r\n\r\n```\r\nTypeError: Cannot read properties of undefined (reading 'by_rule_type_id')\r\n at getTotalAlertsCountAggregations (/usr/share/kibana/node_modules/@kbn/alerting-plugin/server/usage/lib/get_telemetry_from_alerts.js:49:109)\r\n at processTicksAndRejections (node:internal/process/task_queues:95:5)\r\n at async Promise.all (index 6)\r\n at TaskManagerRunner.run (/usr/share/kibana/node_modules/@kbn/task-manager-plugin/server/task_running/task_runner.js:325:22)\r\n```\r\n\r\nFor actions telemetry, the volume of these errors is much lower.\r\nUnfortunately, we are not logging the stack trace for these errors so\r\nit's harder to track down the source. I've updated the code to store the\r\nstack trace as well as adding the same handling for\r\n`no_shard_available_action_exception` under the assumption that it could\r\nalso be getting these errors.\r\n\r\n---------\r\n\r\nCo-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>","sha":"4e0c0a785ad8d8d6df66406542acda1189e2b817","branchLabelMapping":{"^v9.1.0$":"main","^v8.19.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["Feature:Alerting","release_note:skip","Feature:Actions","Team:ResponseOps","backport:prev-minor","ci:project-deploy-security","v9.1.0","v8.19.0"],"title":"[Response Ops][Alerting] Switch to `logger.warn` for `no_shard_available_action_exception` encountered when collecting telemetry","number":205520,"url":"https://github.com/elastic/kibana/pull/205520","mergeCommit":{"message":"[Response Ops][Alerting] Switch to `logger.warn` for `no_shard_available_action_exception` encountered when collecting telemetry (#205520)\n\nResolves https://github.com/elastic/response-ops-team/issues/221\r\n\r\n## Summary\r\n\r\nLooking through the logs for this, the majority of these errors have the\r\nfollowing stack trace (14,000+ in the last 7 days):\r\n\r\n```\r\nResponseError: search_phase_execution_exception\r\n\tRoot causes:\r\n\t\tno_shard_available_action_exception: [es-es-search-686bf6b747-xldl8][100.65.77.183:9300][indices:data/read/search[phase/query]]\r\n\t\tno_shard_available_action_exception: null\r\n at KibanaTransport._request (/usr/share/kibana/node_modules/@elastic/transport/lib/Transport.js:543:27)\r\n at processTicksAndRejections (node:internal/process/task_queues:95:5)\r\n at /usr/share/kibana/node_modules/@elastic/transport/lib/Transport.js:641:32\r\n at KibanaTransport.request (/usr/share/kibana/node_modules/@elastic/transport/lib/Transport.js:637:20)\r\n at KibanaTransport.request (/usr/share/kibana/node_modules/@kbn/core-elasticsearch-client-server-internal/src/create_transport.js:60:16)\r\n at ClientTraced.SearchApi [as search] (/usr/share/kibana/node_modules/@elastic/elasticsearch/lib/api/api/search.js:72:12)\r\n at getTotalAlertsCountAggregations (/usr/share/kibana/node_modules/@kbn/alerting-plugin/server/usage/lib/get_telemetry_from_alerts.js:42:21)\r\n at async Promise.all (index 6)\r\n at TaskManagerRunner.run (/usr/share/kibana/node_modules/@kbn/task-manager-plugin/server/task_running/task_runner.js:325:22)\r\n```\r\n\r\nLooking through the code, we are already catching these errors and\r\nreturning a default response for that telemetry object. The\r\n`no_shard_available_action_exception` is not an issue with Kibana, it's\r\nan ES issue, so this PR catches these types of errors and log them at a\r\ndebug level instead of a warn level to avoid polluting the logs with\r\nerrors we have no control over.\r\n\r\nExcluding those results, we see a different and less frequent stack\r\ntrace (100+ in the last 15 days):\r\n\r\n```\r\nTypeError: Cannot read properties of undefined (reading 'by_rule_type_id')\r\n at getTotalAlertsCountAggregations (/usr/share/kibana/node_modules/@kbn/alerting-plugin/server/usage/lib/get_telemetry_from_alerts.js:49:109)\r\n at processTicksAndRejections (node:internal/process/task_queues:95:5)\r\n at async Promise.all (index 6)\r\n at TaskManagerRunner.run (/usr/share/kibana/node_modules/@kbn/task-manager-plugin/server/task_running/task_runner.js:325:22)\r\n```\r\n\r\nFor actions telemetry, the volume of these errors is much lower.\r\nUnfortunately, we are not logging the stack trace for these errors so\r\nit's harder to track down the source. I've updated the code to store the\r\nstack trace as well as adding the same handling for\r\n`no_shard_available_action_exception` under the assumption that it could\r\nalso be getting these errors.\r\n\r\n---------\r\n\r\nCo-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>","sha":"4e0c0a785ad8d8d6df66406542acda1189e2b817"}},"sourceBranch":"main","suggestedTargetBranches":["8.x"],"targetPullRequestStates":[{"branch":"main","label":"v9.1.0","branchLabelMappingKey":"^v9.1.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/205520","number":205520,"mergeCommit":{"message":"[Response Ops][Alerting] Switch to `logger.warn` for `no_shard_available_action_exception` encountered when collecting telemetry (#205520)\n\nResolves https://github.com/elastic/response-ops-team/issues/221\r\n\r\n## Summary\r\n\r\nLooking through the logs for this, the majority of these errors have the\r\nfollowing stack trace (14,000+ in the last 7 days):\r\n\r\n```\r\nResponseError: search_phase_execution_exception\r\n\tRoot causes:\r\n\t\tno_shard_available_action_exception: [es-es-search-686bf6b747-xldl8][100.65.77.183:9300][indices:data/read/search[phase/query]]\r\n\t\tno_shard_available_action_exception: null\r\n at KibanaTransport._request (/usr/share/kibana/node_modules/@elastic/transport/lib/Transport.js:543:27)\r\n at processTicksAndRejections (node:internal/process/task_queues:95:5)\r\n at /usr/share/kibana/node_modules/@elastic/transport/lib/Transport.js:641:32\r\n at KibanaTransport.request (/usr/share/kibana/node_modules/@elastic/transport/lib/Transport.js:637:20)\r\n at KibanaTransport.request (/usr/share/kibana/node_modules/@kbn/core-elasticsearch-client-server-internal/src/create_transport.js:60:16)\r\n at ClientTraced.SearchApi [as search] (/usr/share/kibana/node_modules/@elastic/elasticsearch/lib/api/api/search.js:72:12)\r\n at getTotalAlertsCountAggregations (/usr/share/kibana/node_modules/@kbn/alerting-plugin/server/usage/lib/get_telemetry_from_alerts.js:42:21)\r\n at async Promise.all (index 6)\r\n at TaskManagerRunner.run (/usr/share/kibana/node_modules/@kbn/task-manager-plugin/server/task_running/task_runner.js:325:22)\r\n```\r\n\r\nLooking through the code, we are already catching these errors and\r\nreturning a default response for that telemetry object. The\r\n`no_shard_available_action_exception` is not an issue with Kibana, it's\r\nan ES issue, so this PR catches these types of errors and log them at a\r\ndebug level instead of a warn level to avoid polluting the logs with\r\nerrors we have no control over.\r\n\r\nExcluding those results, we see a different and less frequent stack\r\ntrace (100+ in the last 15 days):\r\n\r\n```\r\nTypeError: Cannot read properties of undefined (reading 'by_rule_type_id')\r\n at getTotalAlertsCountAggregations (/usr/share/kibana/node_modules/@kbn/alerting-plugin/server/usage/lib/get_telemetry_from_alerts.js:49:109)\r\n at processTicksAndRejections (node:internal/process/task_queues:95:5)\r\n at async Promise.all (index 6)\r\n at TaskManagerRunner.run (/usr/share/kibana/node_modules/@kbn/task-manager-plugin/server/task_running/task_runner.js:325:22)\r\n```\r\n\r\nFor actions telemetry, the volume of these errors is much lower.\r\nUnfortunately, we are not logging the stack trace for these errors so\r\nit's harder to track down the source. I've updated the code to store the\r\nstack trace as well as adding the same handling for\r\n`no_shard_available_action_exception` under the assumption that it could\r\nalso be getting these errors.\r\n\r\n---------\r\n\r\nCo-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>","sha":"4e0c0a785ad8d8d6df66406542acda1189e2b817"}},{"branch":"8.x","label":"v8.19.0","branchLabelMappingKey":"^v8.19.0$","isSourceBranch":false,"state":"NOT_CREATED"}]}] BACKPORT--> Co-authored-by: Ying Mao <ying.mao@elastic.co>
This commit is contained in:
parent
d77bab7c6b
commit
b1e03010ab
12 changed files with 823 additions and 378 deletions
|
@ -6,7 +6,9 @@
|
|||
*/
|
||||
|
||||
import { elasticsearchClientMock } from '@kbn/core-elasticsearch-client-server-mocks';
|
||||
import { MockedLogger, loggerMock } from '@kbn/logging-mocks';
|
||||
import { loggingSystemMock } from '@kbn/core/server/mocks';
|
||||
import { errors } from '@elastic/elasticsearch';
|
||||
import {
|
||||
getCounts,
|
||||
getExecutionsPerDayCount,
|
||||
|
@ -14,9 +16,13 @@ import {
|
|||
getTotalCount,
|
||||
} from './actions_telemetry';
|
||||
|
||||
const mockLogger = loggingSystemMock.create().get();
|
||||
let logger: MockedLogger;
|
||||
|
||||
describe('actions telemetry', () => {
|
||||
beforeEach(() => {
|
||||
logger = loggerMock.create();
|
||||
});
|
||||
|
||||
test('getTotalCount should replace first symbol . to __ for action types names', async () => {
|
||||
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
|
||||
mockEsClient.search.mockResponse(
|
||||
|
@ -107,7 +113,7 @@ describe('actions telemetry', () => {
|
|||
},
|
||||
}
|
||||
);
|
||||
const telemetry = await getTotalCount(mockEsClient, 'test', mockLogger);
|
||||
const telemetry = await getTotalCount(mockEsClient, 'test', logger);
|
||||
|
||||
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
|
||||
|
||||
|
@ -126,16 +132,24 @@ describe('actions telemetry', () => {
|
|||
`);
|
||||
});
|
||||
|
||||
test('getTotalCount should return empty results if query throws error', async () => {
|
||||
test('getTotalCount should return empty results and log warning if query throws error', async () => {
|
||||
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
|
||||
mockEsClient.search.mockRejectedValue(new Error('oh no'));
|
||||
|
||||
const telemetry = await getTotalCount(mockEsClient, 'test', mockLogger);
|
||||
const telemetry = await getTotalCount(mockEsClient, 'test', logger);
|
||||
|
||||
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
|
||||
expect(mockLogger.warn).toHaveBeenCalledWith(
|
||||
`Error executing actions telemetry task: getTotalCount - {}`
|
||||
|
||||
const loggerCalls = loggingSystemMock.collect(logger);
|
||||
expect(loggerCalls.debug).toHaveLength(0);
|
||||
expect(loggerCalls.warn).toHaveLength(1);
|
||||
expect(loggerCalls.warn[0][0]).toEqual(
|
||||
`Error executing actions telemetry task: getTotalCount - Error: oh no`
|
||||
);
|
||||
// logger meta
|
||||
expect(loggerCalls.warn[0][1]?.tags).toEqual(['actions', 'telemetry-failed']);
|
||||
expect(loggerCalls.warn[0][1]?.error?.stack_trace).toBeDefined();
|
||||
|
||||
expect(telemetry).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"countByType": Object {},
|
||||
|
@ -147,6 +161,60 @@ describe('actions telemetry', () => {
|
|||
`);
|
||||
});
|
||||
|
||||
test('getTotalCount should return empty results and log debug if query throws search_phase_execution_exception error', async () => {
|
||||
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
|
||||
mockEsClient.search.mockRejectedValueOnce(
|
||||
new errors.ResponseError({
|
||||
warnings: [],
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
meta: {} as any,
|
||||
body: {
|
||||
error: {
|
||||
root_cause: [],
|
||||
type: 'search_phase_execution_exception',
|
||||
reason: 'no_shard_available_action_exception',
|
||||
phase: 'fetch',
|
||||
grouped: true,
|
||||
failed_shards: [],
|
||||
caused_by: {
|
||||
type: 'no_shard_available_action_exception',
|
||||
reason: 'This is the nested reason',
|
||||
},
|
||||
},
|
||||
},
|
||||
statusCode: 503,
|
||||
headers: {},
|
||||
})
|
||||
);
|
||||
|
||||
const telemetry = await getTotalCount(mockEsClient, 'test', logger);
|
||||
|
||||
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
|
||||
|
||||
const loggerCalls = loggingSystemMock.collect(logger);
|
||||
expect(loggerCalls.debug).toHaveLength(1);
|
||||
expect(loggerCalls.debug[0][0]).toMatchInlineSnapshot(`
|
||||
"Error executing actions telemetry task: getTotalCount - ResponseError: search_phase_execution_exception
|
||||
Caused by:
|
||||
no_shard_available_action_exception: This is the nested reason"
|
||||
`);
|
||||
// logger meta
|
||||
expect(loggerCalls.debug[0][1]?.tags).toEqual(['actions', 'telemetry-failed']);
|
||||
expect(loggerCalls.debug[0][1]?.error?.stack_trace).toBeDefined();
|
||||
|
||||
expect(loggerCalls.warn).toHaveLength(0);
|
||||
|
||||
expect(telemetry).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"countByType": Object {},
|
||||
"countGenAiProviderTypes": Object {},
|
||||
"countTotal": 0,
|
||||
"errorMessage": "no_shard_available_action_exception",
|
||||
"hasErrors": true,
|
||||
}
|
||||
`);
|
||||
});
|
||||
|
||||
test('getInUseTotalCount', async () => {
|
||||
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
|
||||
mockEsClient.search.mockResponseOnce(
|
||||
|
@ -202,7 +270,7 @@ describe('actions telemetry', () => {
|
|||
],
|
||||
},
|
||||
});
|
||||
const telemetry = await getInUseTotalCount(mockEsClient, 'test', mockLogger);
|
||||
const telemetry = await getInUseTotalCount(mockEsClient, 'test', logger);
|
||||
|
||||
expect(mockEsClient.search).toHaveBeenCalledTimes(2);
|
||||
expect(telemetry).toMatchInlineSnapshot(`
|
||||
|
@ -287,7 +355,7 @@ describe('actions telemetry', () => {
|
|||
],
|
||||
},
|
||||
});
|
||||
const telemetry = await getInUseTotalCount(mockEsClient, 'test', mockLogger, undefined, [
|
||||
const telemetry = await getInUseTotalCount(mockEsClient, 'test', logger, undefined, [
|
||||
{
|
||||
id: 'test',
|
||||
actionTypeId: '.email',
|
||||
|
@ -332,16 +400,23 @@ describe('actions telemetry', () => {
|
|||
`);
|
||||
});
|
||||
|
||||
test('getInUseTotalCount should return empty results if query throws error', async () => {
|
||||
test('getInUseTotalCount should return empty results and log warning if query throws error', async () => {
|
||||
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
|
||||
mockEsClient.search.mockRejectedValue(new Error('oh no'));
|
||||
|
||||
const telemetry = await getInUseTotalCount(mockEsClient, 'test', mockLogger);
|
||||
const telemetry = await getInUseTotalCount(mockEsClient, 'test', logger);
|
||||
|
||||
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
|
||||
expect(mockLogger.warn).toHaveBeenCalledWith(
|
||||
`Error executing actions telemetry task: getInUseTotalCount - {}`
|
||||
const loggerCalls = loggingSystemMock.collect(logger);
|
||||
expect(loggerCalls.debug).toHaveLength(0);
|
||||
expect(loggerCalls.warn).toHaveLength(1);
|
||||
expect(loggerCalls.warn[0][0]).toEqual(
|
||||
`Error executing actions telemetry task: getInUseTotalCount - Error: oh no`
|
||||
);
|
||||
// logger meta
|
||||
expect(loggerCalls.warn[0][1]?.tags).toEqual(['actions', 'telemetry-failed']);
|
||||
expect(loggerCalls.warn[0][1]?.error?.stack_trace).toBeDefined();
|
||||
|
||||
expect(telemetry).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"countByAlertHistoryConnectorType": 0,
|
||||
|
@ -355,6 +430,62 @@ describe('actions telemetry', () => {
|
|||
`);
|
||||
});
|
||||
|
||||
test('getInUseTotalCount should return empty results and log debug if query throws search_phase_execution_exception error', async () => {
|
||||
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
|
||||
mockEsClient.search.mockRejectedValueOnce(
|
||||
new errors.ResponseError({
|
||||
warnings: [],
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
meta: {} as any,
|
||||
body: {
|
||||
error: {
|
||||
root_cause: [],
|
||||
type: 'search_phase_execution_exception',
|
||||
reason: 'no_shard_available_action_exception',
|
||||
phase: 'fetch',
|
||||
grouped: true,
|
||||
failed_shards: [],
|
||||
caused_by: {
|
||||
type: 'no_shard_available_action_exception',
|
||||
reason: 'This is the nested reason',
|
||||
},
|
||||
},
|
||||
},
|
||||
statusCode: 503,
|
||||
headers: {},
|
||||
})
|
||||
);
|
||||
|
||||
const telemetry = await getInUseTotalCount(mockEsClient, 'test', logger);
|
||||
|
||||
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
|
||||
|
||||
const loggerCalls = loggingSystemMock.collect(logger);
|
||||
expect(loggerCalls.debug).toHaveLength(1);
|
||||
expect(loggerCalls.debug[0][0]).toMatchInlineSnapshot(`
|
||||
"Error executing actions telemetry task: getInUseTotalCount - ResponseError: search_phase_execution_exception
|
||||
Caused by:
|
||||
no_shard_available_action_exception: This is the nested reason"
|
||||
`);
|
||||
// logger meta
|
||||
expect(loggerCalls.debug[0][1]?.tags).toEqual(['actions', 'telemetry-failed']);
|
||||
expect(loggerCalls.debug[0][1]?.error?.stack_trace).toBeDefined();
|
||||
|
||||
expect(loggerCalls.warn).toHaveLength(0);
|
||||
|
||||
expect(telemetry).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"countByAlertHistoryConnectorType": 0,
|
||||
"countByType": Object {},
|
||||
"countEmailByService": Object {},
|
||||
"countNamespaces": 0,
|
||||
"countTotal": 0,
|
||||
"errorMessage": "no_shard_available_action_exception",
|
||||
"hasErrors": true,
|
||||
}
|
||||
`);
|
||||
});
|
||||
|
||||
test('getTotalCount accounts for preconfigured connectors', async () => {
|
||||
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
|
||||
mockEsClient.search.mockResponse(
|
||||
|
@ -445,7 +576,7 @@ describe('actions telemetry', () => {
|
|||
},
|
||||
}
|
||||
);
|
||||
const telemetry = await getTotalCount(mockEsClient, 'test', mockLogger, [
|
||||
const telemetry = await getTotalCount(mockEsClient, 'test', logger, [
|
||||
{
|
||||
id: 'test',
|
||||
actionTypeId: '.test',
|
||||
|
@ -501,7 +632,7 @@ describe('actions telemetry', () => {
|
|||
},
|
||||
}
|
||||
);
|
||||
const telemetry = await getTotalCount(mockEsClient, 'test', mockLogger, [
|
||||
const telemetry = await getTotalCount(mockEsClient, 'test', logger, [
|
||||
{
|
||||
id: 'system_action:system-connector-test.system-action',
|
||||
actionTypeId: 'test.system-action',
|
||||
|
@ -615,7 +746,7 @@ describe('actions telemetry', () => {
|
|||
],
|
||||
},
|
||||
});
|
||||
const telemetry = await getInUseTotalCount(mockEsClient, 'test', mockLogger, undefined, [
|
||||
const telemetry = await getInUseTotalCount(mockEsClient, 'test', logger, undefined, [
|
||||
{
|
||||
id: 'anotherServerLog',
|
||||
actionTypeId: '.server-log',
|
||||
|
@ -721,7 +852,7 @@ describe('actions telemetry', () => {
|
|||
},
|
||||
});
|
||||
|
||||
const telemetry = await getInUseTotalCount(mockEsClient, 'test', mockLogger, undefined, []);
|
||||
const telemetry = await getInUseTotalCount(mockEsClient, 'test', logger, undefined, []);
|
||||
|
||||
expect(mockEsClient.search).toHaveBeenCalledTimes(2);
|
||||
expect(telemetry).toMatchInlineSnapshot(`
|
||||
|
@ -827,7 +958,7 @@ describe('actions telemetry', () => {
|
|||
],
|
||||
},
|
||||
});
|
||||
const telemetry = await getInUseTotalCount(mockEsClient, 'test', mockLogger);
|
||||
const telemetry = await getInUseTotalCount(mockEsClient, 'test', logger);
|
||||
|
||||
expect(mockEsClient.search).toHaveBeenCalledTimes(2);
|
||||
expect(telemetry).toMatchInlineSnapshot(`
|
||||
|
@ -961,7 +1092,7 @@ describe('actions telemetry', () => {
|
|||
},
|
||||
}
|
||||
);
|
||||
const telemetry = await getExecutionsPerDayCount(mockEsClient, 'test', mockLogger);
|
||||
const telemetry = await getExecutionsPerDayCount(mockEsClient, 'test', logger);
|
||||
|
||||
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
|
||||
expect(telemetry).toStrictEqual({
|
||||
|
@ -995,16 +1126,24 @@ describe('actions telemetry', () => {
|
|||
});
|
||||
});
|
||||
|
||||
test('getExecutionsPerDayCount should return empty results if query throws error', async () => {
|
||||
test('getExecutionsPerDayCount should return empty results and log warning if query throws error', async () => {
|
||||
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
|
||||
mockEsClient.search.mockRejectedValue(new Error('oh no'));
|
||||
|
||||
const telemetry = await getExecutionsPerDayCount(mockEsClient, 'test', mockLogger);
|
||||
const telemetry = await getExecutionsPerDayCount(mockEsClient, 'test', logger);
|
||||
|
||||
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
|
||||
expect(mockLogger.warn).toHaveBeenCalledWith(
|
||||
`Error executing actions telemetry task: getExecutionsPerDayCount - {}`
|
||||
|
||||
const loggerCalls = loggingSystemMock.collect(logger);
|
||||
expect(loggerCalls.debug).toHaveLength(0);
|
||||
expect(loggerCalls.warn).toHaveLength(1);
|
||||
expect(loggerCalls.warn[0][0]).toEqual(
|
||||
`Error executing actions telemetry task: getExecutionsPerDayCount - Error: oh no`
|
||||
);
|
||||
// logger meta
|
||||
expect(loggerCalls.warn[0][1]?.tags).toEqual(['actions', 'telemetry-failed']);
|
||||
expect(loggerCalls.warn[0][1]?.error?.stack_trace).toBeDefined();
|
||||
|
||||
expect(telemetry).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"avgExecutionTime": 0,
|
||||
|
@ -1020,6 +1159,64 @@ describe('actions telemetry', () => {
|
|||
`);
|
||||
});
|
||||
|
||||
test('getExecutionsPerDayCount should return empty results and log debug if query throws search_phase_execution_exception error', async () => {
|
||||
const mockEsClient = elasticsearchClientMock.createClusterClient().asScoped().asInternalUser;
|
||||
mockEsClient.search.mockRejectedValueOnce(
|
||||
new errors.ResponseError({
|
||||
warnings: [],
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
meta: {} as any,
|
||||
body: {
|
||||
error: {
|
||||
root_cause: [],
|
||||
type: 'search_phase_execution_exception',
|
||||
reason: 'no_shard_available_action_exception',
|
||||
phase: 'fetch',
|
||||
grouped: true,
|
||||
failed_shards: [],
|
||||
caused_by: {
|
||||
type: 'no_shard_available_action_exception',
|
||||
reason: 'This is the nested reason',
|
||||
},
|
||||
},
|
||||
},
|
||||
statusCode: 503,
|
||||
headers: {},
|
||||
})
|
||||
);
|
||||
|
||||
const telemetry = await getExecutionsPerDayCount(mockEsClient, 'test', logger);
|
||||
|
||||
expect(mockEsClient.search).toHaveBeenCalledTimes(1);
|
||||
|
||||
const loggerCalls = loggingSystemMock.collect(logger);
|
||||
expect(loggerCalls.debug).toHaveLength(1);
|
||||
expect(loggerCalls.debug[0][0]).toMatchInlineSnapshot(`
|
||||
"Error executing actions telemetry task: getExecutionsPerDayCount - ResponseError: search_phase_execution_exception
|
||||
Caused by:
|
||||
no_shard_available_action_exception: This is the nested reason"
|
||||
`);
|
||||
// logger meta
|
||||
expect(loggerCalls.debug[0][1]?.tags).toEqual(['actions', 'telemetry-failed']);
|
||||
expect(loggerCalls.debug[0][1]?.error?.stack_trace).toBeDefined();
|
||||
|
||||
expect(loggerCalls.warn).toHaveLength(0);
|
||||
|
||||
expect(telemetry).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"avgExecutionTime": 0,
|
||||
"avgExecutionTimeByType": Object {},
|
||||
"countByType": Object {},
|
||||
"countFailed": 0,
|
||||
"countFailedByType": Object {},
|
||||
"countRunOutcomeByConnectorType": Object {},
|
||||
"countTotal": 0,
|
||||
"errorMessage": "no_shard_available_action_exception",
|
||||
"hasErrors": true,
|
||||
}
|
||||
`);
|
||||
});
|
||||
|
||||
it('getCounts', () => {
|
||||
const aggs = {
|
||||
'.d3security': 2,
|
||||
|
|
|
@ -20,6 +20,7 @@ import {
|
|||
getActionExecutions,
|
||||
getActionsCount,
|
||||
} from './lib/actions_telemetry_util';
|
||||
import { parseAndLogError } from './lib/parse_and_log_error';
|
||||
|
||||
export interface InMemoryAggRes {
|
||||
total: number;
|
||||
|
@ -108,9 +109,7 @@ export async function getTotalCount(
|
|||
countGenAiProviderTypes,
|
||||
};
|
||||
} catch (err) {
|
||||
const errorMessage = err && err.message ? err.message : err.toString();
|
||||
|
||||
logger.warn(`Error executing actions telemetry task: getTotalCount - ${JSON.stringify(err)}`);
|
||||
const errorMessage = parseAndLogError(err, `getTotalCount`, logger);
|
||||
|
||||
return {
|
||||
hasErrors: true,
|
||||
|
@ -387,11 +386,8 @@ export async function getInUseTotalCount(
|
|||
countNamespaces: namespacesList.size,
|
||||
};
|
||||
} catch (err) {
|
||||
const errorMessage = err && err.message ? err.message : err.toString();
|
||||
const errorMessage = parseAndLogError(err, `getInUseTotalCount`, logger);
|
||||
|
||||
logger.warn(
|
||||
`Error executing actions telemetry task: getInUseTotalCount - ${JSON.stringify(err)}`
|
||||
);
|
||||
return {
|
||||
hasErrors: true,
|
||||
errorMessage,
|
||||
|
@ -657,10 +653,8 @@ export async function getExecutionsPerDayCount(
|
|||
),
|
||||
};
|
||||
} catch (err) {
|
||||
const errorMessage = err && err.message ? err.message : err.toString();
|
||||
logger.warn(
|
||||
`Error executing actions telemetry task: getExecutionsPerDayCount - ${JSON.stringify(err)}`
|
||||
);
|
||||
const errorMessage = parseAndLogError(err, `getExecutionsPerDayCount`, logger);
|
||||
|
||||
return {
|
||||
hasErrors: true,
|
||||
errorMessage,
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { Logger } from '@kbn/core/server';
|
||||
|
||||
export function parseAndLogError(err: Error, errType: string, logger: Logger): string {
|
||||
const errorMessage = err && err.message ? err.message : err.toString();
|
||||
let returnedErrorMessage = errorMessage;
|
||||
|
||||
const errorStr = JSON.stringify(err);
|
||||
const logMessage = `Error executing actions telemetry task: ${errType} - ${err}`;
|
||||
const logOptions = {
|
||||
tags: ['actions', 'telemetry-failed'],
|
||||
error: { stack_trace: err.stack },
|
||||
};
|
||||
|
||||
// If error string contains "no_shard_available_action_exception", debug log it
|
||||
if (errorStr.includes('no_shard_available_action_exception')) {
|
||||
// the no_shard_available_action_exception can be wordy and the error message returned from this function
|
||||
// gets stored in the task state so lets simplify
|
||||
returnedErrorMessage = 'no_shard_available_action_exception';
|
||||
if (logger.isLevelEnabled('debug')) {
|
||||
logger.debug(logMessage, logOptions);
|
||||
}
|
||||
} else {
|
||||
logger.warn(logMessage, logOptions);
|
||||
}
|
||||
|
||||
return returnedErrorMessage;
|
||||
}
|
|
@ -6,64 +6,50 @@
|
|||
*/
|
||||
|
||||
import { elasticsearchServiceMock, loggingSystemMock } from '@kbn/core/server/mocks';
|
||||
import { MockedLogger, loggerMock } from '@kbn/logging-mocks';
|
||||
import { getTotalAlertsCountAggregations } from './get_telemetry_from_alerts';
|
||||
import { errors } from '@elastic/elasticsearch';
|
||||
|
||||
const elasticsearch = elasticsearchServiceMock.createStart();
|
||||
const esClient = elasticsearch.client.asInternalUser;
|
||||
const logger: ReturnType<typeof loggingSystemMock.createLogger> = loggingSystemMock.createLogger();
|
||||
let logger: MockedLogger;
|
||||
|
||||
describe('kibana index telemetry', () => {
|
||||
beforeEach(() => {
|
||||
jest.resetAllMocks();
|
||||
logger = loggerMock.create();
|
||||
});
|
||||
|
||||
it('should return total alert couts and alert counts by rule type id', async () => {
|
||||
it('should return total alert counts and alert counts by rule type id', async () => {
|
||||
esClient.search.mockResponseOnce({
|
||||
took: 4,
|
||||
timed_out: false,
|
||||
_shards: {
|
||||
total: 1,
|
||||
successful: 1,
|
||||
skipped: 0,
|
||||
failed: 0,
|
||||
},
|
||||
hits: {
|
||||
total: {
|
||||
value: 6,
|
||||
relation: 'eq',
|
||||
},
|
||||
max_score: null,
|
||||
hits: [],
|
||||
},
|
||||
_shards: { total: 1, successful: 1, skipped: 0, failed: 0 },
|
||||
hits: { total: { value: 6, relation: 'eq' }, max_score: null, hits: [] },
|
||||
aggregations: {
|
||||
by_rule_type_id: {
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
buckets: [
|
||||
{
|
||||
key: '.index-threshold',
|
||||
doc_count: 1,
|
||||
},
|
||||
{
|
||||
key: 'logs.alert.document.count',
|
||||
doc_count: 2,
|
||||
},
|
||||
{
|
||||
key: 'document.test.',
|
||||
doc_count: 3,
|
||||
},
|
||||
{ key: '.index-threshold', doc_count: 1 },
|
||||
{ key: 'logs.alert.document.count', doc_count: 2 },
|
||||
{ key: 'document.test.', doc_count: 3 },
|
||||
],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const telemetry = await getTotalAlertsCountAggregations({
|
||||
esClient,
|
||||
logger,
|
||||
});
|
||||
const telemetry = await getTotalAlertsCountAggregations({ esClient, logger });
|
||||
|
||||
expect(esClient.search).toHaveBeenCalledTimes(1);
|
||||
expect(logger.debug).toHaveBeenCalledTimes(2);
|
||||
const debugLogs = loggingSystemMock.collect(logger).debug;
|
||||
expect(debugLogs).toHaveLength(2);
|
||||
expect(debugLogs[0][0]).toEqual(
|
||||
`query for getTotalAlertsCountAggregations - {\"index\":\".alerts-*\",\"size\":0,\"body\":{\"query\":{\"match_all\":{}},\"aggs\":{\"by_rule_type_id\":{\"terms\":{\"field\":\"kibana.alert.rule.rule_type_id\",\"size\":33}}}}}`
|
||||
);
|
||||
expect(debugLogs[1][0]).toEqual(
|
||||
`results for getTotalAlertsCountAggregations query - {\"took\":4,\"timed_out\":false,\"_shards\":{\"total\":1,\"successful\":1,\"skipped\":0,\"failed\":0},\"hits\":{\"total\":{\"value\":6,\"relation\":\"eq\"},\"max_score\":null,\"hits\":[]},\"aggregations\":{\"by_rule_type_id\":{\"doc_count_error_upper_bound\":0,\"sum_other_doc_count\":0,\"buckets\":[{\"key\":\".index-threshold\",\"doc_count\":1},{\"key\":\"logs.alert.document.count\",\"doc_count\":2},{\"key\":\"document.test.\",\"doc_count\":3}]}}}`
|
||||
);
|
||||
|
||||
expect(telemetry).toEqual({
|
||||
hasErrors: false,
|
||||
|
@ -77,37 +63,18 @@ describe('kibana index telemetry', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('should return ', async () => {
|
||||
it('should return on empty results', async () => {
|
||||
esClient.search.mockResponseOnce({
|
||||
took: 4,
|
||||
timed_out: false,
|
||||
_shards: {
|
||||
total: 1,
|
||||
successful: 1,
|
||||
skipped: 0,
|
||||
failed: 0,
|
||||
},
|
||||
hits: {
|
||||
total: {
|
||||
value: 0,
|
||||
relation: 'eq',
|
||||
},
|
||||
max_score: null,
|
||||
hits: [],
|
||||
},
|
||||
_shards: { total: 1, successful: 1, skipped: 0, failed: 0 },
|
||||
hits: { total: { value: 0, relation: 'eq' }, max_score: null, hits: [] },
|
||||
aggregations: {
|
||||
by_rule_type_id: {
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
buckets: [],
|
||||
},
|
||||
by_rule_type_id: { doc_count_error_upper_bound: 0, sum_other_doc_count: 0, buckets: [] },
|
||||
},
|
||||
});
|
||||
|
||||
const telemetry = await getTotalAlertsCountAggregations({
|
||||
esClient,
|
||||
logger,
|
||||
});
|
||||
const telemetry = await getTotalAlertsCountAggregations({ esClient, logger });
|
||||
|
||||
expect(telemetry).toEqual({
|
||||
hasErrors: false,
|
||||
|
@ -116,17 +83,25 @@ describe('kibana index telemetry', () => {
|
|||
});
|
||||
});
|
||||
|
||||
test('should return empty results and log warning if query throws error', async () => {
|
||||
it('should return empty results and log warning if query throws error', async () => {
|
||||
esClient.search.mockRejectedValueOnce(new Error('test'));
|
||||
|
||||
const telemetry = await getTotalAlertsCountAggregations({
|
||||
esClient,
|
||||
logger,
|
||||
});
|
||||
const telemetry = await getTotalAlertsCountAggregations({ esClient, logger });
|
||||
|
||||
expect(esClient.search).toHaveBeenCalledTimes(1);
|
||||
expect(logger.debug).toHaveBeenCalledTimes(1);
|
||||
expect(logger.warn).toHaveBeenCalledTimes(1);
|
||||
|
||||
const loggerCalls = loggingSystemMock.collect(logger);
|
||||
expect(loggerCalls.debug).toHaveLength(1);
|
||||
expect(loggerCalls.debug[0][0]).toEqual(
|
||||
`query for getTotalAlertsCountAggregations - {\"index\":\".alerts-*\",\"size\":0,\"body\":{\"query\":{\"match_all\":{}},\"aggs\":{\"by_rule_type_id\":{\"terms\":{\"field\":\"kibana.alert.rule.rule_type_id\",\"size\":33}}}}}`
|
||||
);
|
||||
expect(loggerCalls.warn).toHaveLength(1);
|
||||
expect(loggerCalls.warn[0][0]).toEqual(
|
||||
`Error executing alerting telemetry task: getTotalAlertsCountAggregations - Error: test`
|
||||
);
|
||||
// logger meta
|
||||
expect(loggerCalls.warn[0][1]?.tags).toEqual(['alerting', 'telemetry-failed']);
|
||||
expect(loggerCalls.warn[0][1]?.error?.stack_trace).toBeDefined();
|
||||
|
||||
expect(telemetry).toEqual({
|
||||
hasErrors: true,
|
||||
|
@ -135,4 +110,56 @@ describe('kibana index telemetry', () => {
|
|||
count_alerts_by_rule_type: {},
|
||||
});
|
||||
});
|
||||
|
||||
it('should return empty results and log debug log if query throws search_phase_execution_exception error', async () => {
|
||||
esClient.search.mockRejectedValueOnce(
|
||||
new errors.ResponseError({
|
||||
warnings: [],
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
meta: {} as any,
|
||||
body: {
|
||||
error: {
|
||||
root_cause: [],
|
||||
type: 'search_phase_execution_exception',
|
||||
reason: 'no_shard_available_action_exception',
|
||||
phase: 'fetch',
|
||||
grouped: true,
|
||||
failed_shards: [],
|
||||
caused_by: {
|
||||
type: 'no_shard_available_action_exception',
|
||||
reason: 'This is the nested reason',
|
||||
},
|
||||
},
|
||||
},
|
||||
statusCode: 503,
|
||||
headers: {},
|
||||
})
|
||||
);
|
||||
|
||||
const telemetry = await getTotalAlertsCountAggregations({ esClient, logger });
|
||||
|
||||
expect(esClient.search).toHaveBeenCalledTimes(1);
|
||||
|
||||
const loggerCalls = loggingSystemMock.collect(logger);
|
||||
expect(loggerCalls.debug).toHaveLength(2);
|
||||
expect(loggerCalls.debug[0][0]).toEqual(
|
||||
`query for getTotalAlertsCountAggregations - {\"index\":\".alerts-*\",\"size\":0,\"body\":{\"query\":{\"match_all\":{}},\"aggs\":{\"by_rule_type_id\":{\"terms\":{\"field\":\"kibana.alert.rule.rule_type_id\",\"size\":33}}}}}`
|
||||
);
|
||||
expect(loggerCalls.debug[1][0]).toMatchInlineSnapshot(`
|
||||
"Error executing alerting telemetry task: getTotalAlertsCountAggregations - ResponseError: search_phase_execution_exception
|
||||
Caused by:
|
||||
no_shard_available_action_exception: This is the nested reason"
|
||||
`);
|
||||
// logger meta
|
||||
expect(loggerCalls.debug[1][1]?.tags).toEqual(['alerting', 'telemetry-failed']);
|
||||
expect(loggerCalls.debug[1][1]?.error?.stack_trace).toBeDefined();
|
||||
expect(loggerCalls.warn).toHaveLength(0);
|
||||
|
||||
expect(telemetry).toEqual({
|
||||
hasErrors: true,
|
||||
errorMessage: `no_shard_available_action_exception`,
|
||||
count_alerts_total: 0,
|
||||
count_alerts_by_rule_type: {},
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -14,6 +14,7 @@ import { ElasticsearchClient, Logger } from '@kbn/core/server';
|
|||
import { NUM_ALERTING_RULE_TYPES } from '../alerting_usage_collector';
|
||||
import { parseSimpleRuleTypeBucket } from './parse_simple_rule_type_bucket';
|
||||
import { AlertingUsage } from '../types';
|
||||
import { parseAndLogError } from './parse_and_log_error';
|
||||
|
||||
interface Opts {
|
||||
esClient: ElasticsearchClient;
|
||||
|
@ -69,20 +70,10 @@ export async function getTotalAlertsCountAggregations({
|
|||
return {
|
||||
hasErrors: false,
|
||||
count_alerts_total: totalAlertsCount ?? 0,
|
||||
count_alerts_by_rule_type: parseSimpleRuleTypeBucket(aggregations.by_rule_type_id.buckets),
|
||||
count_alerts_by_rule_type: parseSimpleRuleTypeBucket(aggregations?.by_rule_type_id?.buckets),
|
||||
};
|
||||
} catch (err) {
|
||||
const errorMessage = err && err.message ? err.message : err.toString();
|
||||
|
||||
logger.warn(
|
||||
`Error executing alerting telemetry task: getTotalAlertsCountAggregations - ${JSON.stringify(
|
||||
err
|
||||
)}`,
|
||||
{
|
||||
tags: ['alerting', 'telemetry-failed'],
|
||||
error: { stack_trace: err.stack },
|
||||
}
|
||||
);
|
||||
const errorMessage = parseAndLogError(err, `getTotalAlertsCountAggregations`, logger);
|
||||
|
||||
return {
|
||||
hasErrors: true,
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
*/
|
||||
|
||||
import { elasticsearchServiceMock, loggingSystemMock } from '@kbn/core/server/mocks';
|
||||
import { MockedLogger, loggerMock } from '@kbn/logging-mocks';
|
||||
import { errors } from '@elastic/elasticsearch';
|
||||
import {
|
||||
getExecutionsPerDayCount,
|
||||
parseExecutionFailureByRuleType,
|
||||
|
@ -17,11 +19,12 @@ import {
|
|||
|
||||
const elasticsearch = elasticsearchServiceMock.createStart();
|
||||
const esClient = elasticsearch.client.asInternalUser;
|
||||
const logger: ReturnType<typeof loggingSystemMock.createLogger> = loggingSystemMock.createLogger();
|
||||
let logger: MockedLogger;
|
||||
|
||||
describe('event log telemetry', () => {
|
||||
beforeEach(() => {
|
||||
jest.resetAllMocks();
|
||||
logger = loggerMock.create();
|
||||
});
|
||||
|
||||
describe('parseRuleTypeBucket', () => {
|
||||
|
@ -1106,20 +1109,8 @@ describe('event log telemetry', () => {
|
|||
esClient.search.mockResponse({
|
||||
took: 4,
|
||||
timed_out: false,
|
||||
_shards: {
|
||||
total: 1,
|
||||
successful: 1,
|
||||
skipped: 0,
|
||||
failed: 0,
|
||||
},
|
||||
hits: {
|
||||
total: {
|
||||
value: 148,
|
||||
relation: 'eq',
|
||||
},
|
||||
max_score: null,
|
||||
hits: [],
|
||||
},
|
||||
_shards: { total: 1, successful: 1, skipped: 0, failed: 0 },
|
||||
hits: { total: { value: 148, relation: 'eq' }, max_score: null, hits: [] },
|
||||
aggregations: {
|
||||
by_rule_type_id: {
|
||||
doc_count_error_upper_bound: 0,
|
||||
|
@ -1302,11 +1293,7 @@ describe('event log telemetry', () => {
|
|||
},
|
||||
});
|
||||
|
||||
const telemetry = await getExecutionsPerDayCount({
|
||||
esClient,
|
||||
eventLogIndex: 'test',
|
||||
logger,
|
||||
});
|
||||
const telemetry = await getExecutionsPerDayCount({ esClient, eventLogIndex: 'test', logger });
|
||||
|
||||
expect(esClient.search).toHaveBeenCalledTimes(1);
|
||||
|
||||
|
@ -1416,17 +1403,14 @@ describe('event log telemetry', () => {
|
|||
test('should return empty results and log warning if query throws error', async () => {
|
||||
esClient.search.mockRejectedValue(new Error('oh no'));
|
||||
|
||||
const telemetry = await getExecutionsPerDayCount({
|
||||
esClient,
|
||||
eventLogIndex: 'test',
|
||||
logger,
|
||||
});
|
||||
const telemetry = await getExecutionsPerDayCount({ esClient, eventLogIndex: 'test', logger });
|
||||
|
||||
expect(esClient.search).toHaveBeenCalledTimes(1);
|
||||
|
||||
const loggerCall = logger.warn.mock.calls[0][0];
|
||||
const loggerMeta = logger.warn.mock.calls[0][1];
|
||||
expect(loggerCall as string).toMatchInlineSnapshot(
|
||||
`"Error executing alerting telemetry task: getExecutionsPerDayCount - {}"`
|
||||
`"Error executing alerting telemetry task: getExecutionsPerDayCount - Error: oh no"`
|
||||
);
|
||||
expect(loggerMeta?.tags).toEqual(['alerting', 'telemetry-failed']);
|
||||
expect(loggerMeta?.error?.stack_trace).toBeDefined();
|
||||
|
@ -1451,6 +1435,72 @@ describe('event log telemetry', () => {
|
|||
countRulesByExecutionStatus: {},
|
||||
});
|
||||
});
|
||||
|
||||
it('should return empty results and log debug log if query throws search_phase_execution_exception error', async () => {
|
||||
esClient.search.mockRejectedValueOnce(
|
||||
new errors.ResponseError({
|
||||
warnings: [],
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
meta: {} as any,
|
||||
body: {
|
||||
error: {
|
||||
root_cause: [],
|
||||
type: 'search_phase_execution_exception',
|
||||
reason: 'no_shard_available_action_exception',
|
||||
phase: 'fetch',
|
||||
grouped: true,
|
||||
failed_shards: [],
|
||||
caused_by: {
|
||||
type: 'no_shard_available_action_exception',
|
||||
reason: 'This is the nested reason',
|
||||
},
|
||||
},
|
||||
},
|
||||
statusCode: 503,
|
||||
headers: {},
|
||||
})
|
||||
);
|
||||
|
||||
const telemetry = await getExecutionsPerDayCount({ esClient, eventLogIndex: 'test', logger });
|
||||
|
||||
expect(esClient.search).toHaveBeenCalledTimes(1);
|
||||
|
||||
const loggerCalls = loggingSystemMock.collect(logger);
|
||||
expect(loggerCalls.debug).toHaveLength(2);
|
||||
expect(loggerCalls.debug[0][0]).toEqual(
|
||||
`query for getExecutionsPerDayCount - {\"index\":\"test\",\"size\":0,\"body\":{\"query\":{\"bool\":{\"filter\":{\"bool\":{\"must\":[{\"term\":{\"event.action\":\"execute\"}},{\"term\":{\"event.provider\":\"alerting\"}},{\"range\":{\"@timestamp\":{\"gte\":\"now-1d\"}}}]}}}},\"aggs\":{\"avg_execution_time\":{\"avg\":{\"field\":\"event.duration\"}},\"avg_es_search_duration\":{\"avg\":{\"field\":\"kibana.alert.rule.execution.metrics.es_search_duration_ms\"}},\"avg_total_search_duration\":{\"avg\":{\"field\":\"kibana.alert.rule.execution.metrics.total_search_duration_ms\"}},\"percentile_scheduled_actions\":{\"percentiles\":{\"field\":\"kibana.alert.rule.execution.metrics.number_of_generated_actions\",\"percents\":[50,90,99]}},\"percentile_alerts\":{\"percentiles\":{\"field\":\"kibana.alert.rule.execution.metrics.alert_counts.active\",\"percents\":[50,90,99]}},\"execution_failures\":{\"filter\":{\"term\":{\"event.outcome\":\"failure\"}},\"aggs\":{\"by_reason\":{\"terms\":{\"field\":\"event.reason\",\"size\":5}}}},\"by_rule_type_id\":{\"terms\":{\"field\":\"rule.category\",\"size\":33},\"aggs\":{\"avg_execution_time\":{\"avg\":{\"field\":\"event.duration\"}},\"avg_es_search_duration\":{\"avg\":{\"field\":\"kibana.alert.rule.execution.metrics.es_search_duration_ms\"}},\"avg_total_search_duration\":{\"avg\":{\"field\":\"kibana.alert.rule.execution.metrics.total_search_duration_ms\"}},\"percentile_scheduled_actions\":{\"percentiles\":{\"field\":\"kibana.alert.rule.execution.metrics.number_of_generated_actions\",\"percents\":[50,90,99]}},\"percentile_alerts\":{\"percentiles\":{\"field\":\"kibana.alert.rule.execution.metrics.alert_counts.active\",\"percents\":[50,90,99]}},\"execution_failures\":{\"filter\":{\"term\":{\"event.outcome\":\"failure\"}},\"aggs\":{\"by_reason\":{\"terms\":{\"field\":\"event.reason\",\"size\":5}}}}}},\"by_execution_status\":{\"terms\":{\"field\":\"event.outcome\"}}}}}`
|
||||
);
|
||||
expect(loggerCalls.debug[1][0]).toMatchInlineSnapshot(`
|
||||
"Error executing alerting telemetry task: getExecutionsPerDayCount - ResponseError: search_phase_execution_exception
|
||||
Caused by:
|
||||
no_shard_available_action_exception: This is the nested reason"
|
||||
`);
|
||||
// logger meta
|
||||
expect(loggerCalls.debug[1][1]?.tags).toEqual(['alerting', 'telemetry-failed']);
|
||||
expect(loggerCalls.debug[1][1]?.error?.stack_trace).toBeDefined();
|
||||
expect(loggerCalls.warn).toHaveLength(0);
|
||||
|
||||
expect(telemetry).toStrictEqual({
|
||||
hasErrors: true,
|
||||
errorMessage: 'no_shard_available_action_exception',
|
||||
countTotalRuleExecutions: 0,
|
||||
countRuleExecutionsByType: {},
|
||||
countTotalFailedExecutions: 0,
|
||||
countFailedExecutionsByReason: {},
|
||||
countFailedExecutionsByReasonByType: {},
|
||||
avgExecutionTime: 0,
|
||||
avgExecutionTimeByType: {},
|
||||
avgEsSearchDuration: 0,
|
||||
avgEsSearchDurationByType: {},
|
||||
avgTotalSearchDuration: 0,
|
||||
avgTotalSearchDurationByType: {},
|
||||
generatedActionsPercentiles: {},
|
||||
generatedActionsPercentilesByType: {},
|
||||
alertsPercentiles: {},
|
||||
alertsPercentilesByType: {},
|
||||
countRulesByExecutionStatus: {},
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('getExecutionTimeoutsPerDayCount', () => {
|
||||
|
@ -1458,37 +1508,16 @@ describe('event log telemetry', () => {
|
|||
esClient.search.mockResponse({
|
||||
took: 4,
|
||||
timed_out: false,
|
||||
_shards: {
|
||||
total: 1,
|
||||
successful: 1,
|
||||
skipped: 0,
|
||||
failed: 0,
|
||||
},
|
||||
hits: {
|
||||
total: {
|
||||
value: 4,
|
||||
relation: 'eq',
|
||||
},
|
||||
max_score: null,
|
||||
hits: [],
|
||||
},
|
||||
_shards: { total: 1, successful: 1, skipped: 0, failed: 0 },
|
||||
hits: { total: { value: 4, relation: 'eq' }, max_score: null, hits: [] },
|
||||
aggregations: {
|
||||
by_rule_type_id: {
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
buckets: [
|
||||
{
|
||||
key: '.index-threshold',
|
||||
doc_count: 2,
|
||||
},
|
||||
{
|
||||
key: 'logs.alert.document.count',
|
||||
doc_count: 1,
|
||||
},
|
||||
{
|
||||
key: 'document.test.',
|
||||
doc_count: 1,
|
||||
},
|
||||
{ key: '.index-threshold', doc_count: 2 },
|
||||
{ key: 'logs.alert.document.count', doc_count: 1 },
|
||||
{ key: 'document.test.', doc_count: 1 },
|
||||
],
|
||||
},
|
||||
},
|
||||
|
@ -1527,7 +1556,7 @@ describe('event log telemetry', () => {
|
|||
const loggerCall = logger.warn.mock.calls[0][0];
|
||||
const loggerMeta = logger.warn.mock.calls[0][1];
|
||||
expect(loggerCall as string).toMatchInlineSnapshot(
|
||||
`"Error executing alerting telemetry task: getExecutionsTimeoutsPerDayCount - {}"`
|
||||
`"Error executing alerting telemetry task: getExecutionsTimeoutsPerDayCount - Error: oh no"`
|
||||
);
|
||||
expect(loggerMeta?.tags).toEqual(['alerting', 'telemetry-failed']);
|
||||
expect(loggerMeta?.error?.stack_trace).toBeDefined();
|
||||
|
@ -1538,5 +1567,61 @@ describe('event log telemetry', () => {
|
|||
hasErrors: true,
|
||||
});
|
||||
});
|
||||
|
||||
it('should return empty results and log debug log if query throws search_phase_execution_exception error', async () => {
|
||||
esClient.search.mockRejectedValueOnce(
|
||||
new errors.ResponseError({
|
||||
warnings: [],
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
meta: {} as any,
|
||||
body: {
|
||||
error: {
|
||||
root_cause: [],
|
||||
type: 'search_phase_execution_exception',
|
||||
reason: 'no_shard_available_action_exception',
|
||||
phase: 'fetch',
|
||||
grouped: true,
|
||||
failed_shards: [],
|
||||
caused_by: {
|
||||
type: 'no_shard_available_action_exception',
|
||||
reason: 'This is the nested reason',
|
||||
},
|
||||
},
|
||||
},
|
||||
statusCode: 503,
|
||||
headers: {},
|
||||
})
|
||||
);
|
||||
|
||||
const telemetry = await getExecutionTimeoutsPerDayCount({
|
||||
esClient,
|
||||
eventLogIndex: 'test',
|
||||
logger,
|
||||
});
|
||||
|
||||
expect(esClient.search).toHaveBeenCalledTimes(1);
|
||||
|
||||
const loggerCalls = loggingSystemMock.collect(logger);
|
||||
expect(loggerCalls.debug).toHaveLength(2);
|
||||
expect(loggerCalls.debug[0][0]).toEqual(
|
||||
`query for getExecutionTimeoutsPerDayCount - {\"index\":\"test\",\"size\":0,\"body\":{\"query\":{\"bool\":{\"filter\":{\"bool\":{\"must\":[{\"term\":{\"event.action\":\"execute-timeout\"}},{\"term\":{\"event.provider\":\"alerting\"}},{\"range\":{\"@timestamp\":{\"gte\":\"now-1d\"}}}]}}}},\"aggs\":{\"by_rule_type_id\":{\"terms\":{\"field\":\"rule.category\",\"size\":33}}}}}`
|
||||
);
|
||||
expect(loggerCalls.debug[1][0]).toMatchInlineSnapshot(`
|
||||
"Error executing alerting telemetry task: getExecutionsTimeoutsPerDayCount - ResponseError: search_phase_execution_exception
|
||||
Caused by:
|
||||
no_shard_available_action_exception: This is the nested reason"
|
||||
`);
|
||||
// logger meta
|
||||
expect(loggerCalls.debug[1][1]?.tags).toEqual(['alerting', 'telemetry-failed']);
|
||||
expect(loggerCalls.debug[1][1]?.error?.stack_trace).toBeDefined();
|
||||
expect(loggerCalls.warn).toHaveLength(0);
|
||||
|
||||
expect(telemetry).toStrictEqual({
|
||||
hasErrors: true,
|
||||
errorMessage: 'no_shard_available_action_exception',
|
||||
countExecutionTimeouts: 0,
|
||||
countExecutionTimeoutsByType: {},
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -22,6 +22,7 @@ import {
|
|||
} from '../alerting_usage_collector';
|
||||
import { replaceDotSymbols } from './replace_dots_with_underscores';
|
||||
import { parseSimpleRuleTypeBucket } from './parse_simple_rule_type_bucket';
|
||||
import { parseAndLogError } from './parse_and_log_error';
|
||||
|
||||
const Millis2Nanos = 1000 * 1000;
|
||||
const percentileFieldNameMapping: Record<string, string> = {
|
||||
|
@ -189,14 +190,8 @@ export async function getExecutionsPerDayCount({
|
|||
),
|
||||
};
|
||||
} catch (err) {
|
||||
const errorMessage = err && err.message ? err.message : err.toString();
|
||||
logger.warn(
|
||||
`Error executing alerting telemetry task: getExecutionsPerDayCount - ${JSON.stringify(err)}`,
|
||||
{
|
||||
tags: ['alerting', 'telemetry-failed'],
|
||||
error: { stack_trace: err.stack },
|
||||
}
|
||||
);
|
||||
const errorMessage = parseAndLogError(err, `getExecutionsPerDayCount`, logger);
|
||||
|
||||
return {
|
||||
hasErrors: true,
|
||||
errorMessage,
|
||||
|
@ -262,17 +257,8 @@ export async function getExecutionTimeoutsPerDayCount({
|
|||
countExecutionTimeoutsByType: parseSimpleRuleTypeBucket(aggregations.by_rule_type_id.buckets),
|
||||
};
|
||||
} catch (err) {
|
||||
const errorMessage = err && err.message ? err.message : err.toString();
|
||||
const errorMessage = parseAndLogError(err, `getExecutionsTimeoutsPerDayCount`, logger);
|
||||
|
||||
logger.warn(
|
||||
`Error executing alerting telemetry task: getExecutionsTimeoutsPerDayCount - ${JSON.stringify(
|
||||
err
|
||||
)}`,
|
||||
{
|
||||
tags: ['alerting', 'telemetry-failed'],
|
||||
error: { stack_trace: err.stack },
|
||||
}
|
||||
);
|
||||
return {
|
||||
hasErrors: true,
|
||||
errorMessage,
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -25,6 +25,7 @@ import { parseSimpleRuleTypeBucket } from './parse_simple_rule_type_bucket';
|
|||
import { groupRulesBySearchType } from './group_rules_by_search_type';
|
||||
import { MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE } from '../../../common';
|
||||
import { MaintenanceWindowAttributes } from '../../data/maintenance_window/types';
|
||||
import { parseAndLogError } from './parse_and_log_error';
|
||||
|
||||
interface Opts {
|
||||
esClient: ElasticsearchClient;
|
||||
|
@ -376,15 +377,8 @@ export async function getTotalCountAggregations({
|
|||
},
|
||||
};
|
||||
} catch (err) {
|
||||
const errorMessage = err && err.message ? err.message : err.toString();
|
||||
const errorMessage = parseAndLogError(err, `getTotalCountAggregations`, logger);
|
||||
|
||||
logger.warn(
|
||||
`Error executing alerting telemetry task: getTotalCountAggregations - ${JSON.stringify(err)}`,
|
||||
{
|
||||
tags: ['alerting', 'telemetry-failed'],
|
||||
error: { stack_trace: err.stack },
|
||||
}
|
||||
);
|
||||
return {
|
||||
hasErrors: true,
|
||||
errorMessage,
|
||||
|
@ -491,14 +485,8 @@ export async function getTotalCountInUse({
|
|||
countNamespaces: aggregations.namespaces_count.value ?? 0,
|
||||
};
|
||||
} catch (err) {
|
||||
const errorMessage = err && err.message ? err.message : err.toString();
|
||||
logger.warn(
|
||||
`Error executing alerting telemetry task: getTotalCountInUse - ${JSON.stringify(err)}`,
|
||||
{
|
||||
tags: ['alerting', 'telemetry-failed'],
|
||||
error: { stack_trace: err.stack },
|
||||
}
|
||||
);
|
||||
const errorMessage = parseAndLogError(err, `getTotalCountInUse`, logger);
|
||||
|
||||
return {
|
||||
hasErrors: true,
|
||||
errorMessage,
|
||||
|
@ -548,14 +536,8 @@ export async function getMWTelemetry({
|
|||
count_mw_with_filter_alert_toggle_on: countMWWithFilterAlertToggleON,
|
||||
};
|
||||
} catch (err) {
|
||||
const errorMessage = err?.message ? err.message : err.toString();
|
||||
logger.warn(
|
||||
`Error executing alerting telemetry task: getTotalMWCount - ${JSON.stringify(err)}`,
|
||||
{
|
||||
tags: ['alerting', 'telemetry-failed'],
|
||||
error: { stack_trace: err?.stack },
|
||||
}
|
||||
);
|
||||
const errorMessage = parseAndLogError(err, `getTotalMWCount`, logger);
|
||||
|
||||
return {
|
||||
hasErrors: true,
|
||||
errorMessage,
|
||||
|
|
|
@ -5,7 +5,9 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import { errors } from '@elastic/elasticsearch';
|
||||
import { elasticsearchServiceMock, loggingSystemMock } from '@kbn/core/server/mocks';
|
||||
import { MockedLogger, loggerMock } from '@kbn/logging-mocks';
|
||||
import {
|
||||
getFailedAndUnrecognizedTasksPerDay,
|
||||
parseBucket,
|
||||
|
@ -13,11 +15,12 @@ import {
|
|||
|
||||
const elasticsearch = elasticsearchServiceMock.createStart();
|
||||
const esClient = elasticsearch.client.asInternalUser;
|
||||
const logger: ReturnType<typeof loggingSystemMock.createLogger> = loggingSystemMock.createLogger();
|
||||
let logger: MockedLogger;
|
||||
|
||||
describe('task manager telemetry', () => {
|
||||
beforeEach(() => {
|
||||
jest.resetAllMocks();
|
||||
logger = loggerMock.create();
|
||||
});
|
||||
|
||||
describe('parseBucket', () => {
|
||||
|
@ -145,20 +148,8 @@ describe('task manager telemetry', () => {
|
|||
esClient.search.mockResponse({
|
||||
took: 4,
|
||||
timed_out: false,
|
||||
_shards: {
|
||||
total: 1,
|
||||
successful: 1,
|
||||
skipped: 0,
|
||||
failed: 0,
|
||||
},
|
||||
hits: {
|
||||
total: {
|
||||
value: 40,
|
||||
relation: 'eq',
|
||||
},
|
||||
max_score: null,
|
||||
hits: [],
|
||||
},
|
||||
_shards: { total: 1, successful: 1, skipped: 0, failed: 0 },
|
||||
hits: { total: { value: 40, relation: 'eq' }, max_score: null, hits: [] },
|
||||
aggregations: {
|
||||
by_status: {
|
||||
doc_count_error_upper_bound: 0,
|
||||
|
@ -243,7 +234,7 @@ describe('task manager telemetry', () => {
|
|||
const loggerCall = logger.warn.mock.calls[0][0];
|
||||
const loggerMeta = logger.warn.mock.calls[0][1];
|
||||
expect(loggerCall as string).toMatchInlineSnapshot(
|
||||
`"Error executing alerting telemetry task: getFailedAndUnrecognizedTasksPerDay - {}"`
|
||||
`"Error executing alerting telemetry task: getFailedAndUnrecognizedTasksPerDay - Error: oh no"`
|
||||
);
|
||||
expect(loggerMeta?.tags).toEqual(['alerting', 'telemetry-failed']);
|
||||
expect(loggerMeta?.error?.stack_trace).toBeDefined();
|
||||
|
@ -255,5 +246,62 @@ describe('task manager telemetry', () => {
|
|||
countFailedAndUnrecognizedTasksByStatusByType: {},
|
||||
});
|
||||
});
|
||||
|
||||
test('should return empty results and log debug log if query throws search_phase_execution_exception error', async () => {
|
||||
esClient.search.mockRejectedValueOnce(
|
||||
new errors.ResponseError({
|
||||
warnings: [],
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
meta: {} as any,
|
||||
body: {
|
||||
error: {
|
||||
root_cause: [],
|
||||
type: 'search_phase_execution_exception',
|
||||
reason: 'no_shard_available_action_exception',
|
||||
phase: 'fetch',
|
||||
grouped: true,
|
||||
failed_shards: [],
|
||||
caused_by: {
|
||||
type: 'no_shard_available_action_exception',
|
||||
reason: 'This is the nested reason',
|
||||
},
|
||||
},
|
||||
},
|
||||
statusCode: 503,
|
||||
headers: {},
|
||||
})
|
||||
);
|
||||
|
||||
const telemetry = await getFailedAndUnrecognizedTasksPerDay({
|
||||
esClient,
|
||||
taskManagerIndex: 'test',
|
||||
logger,
|
||||
});
|
||||
|
||||
expect(esClient.search).toHaveBeenCalledTimes(1);
|
||||
|
||||
const loggerCalls = loggingSystemMock.collect(logger);
|
||||
expect(loggerCalls.debug).toHaveLength(2);
|
||||
expect(loggerCalls.debug[0][0]).toEqual(
|
||||
`query for getFailedAndUnrecognizedTasksPerDay - {\"index\":\"test\",\"size\":0,\"body\":{\"query\":{\"bool\":{\"must\":[{\"bool\":{\"should\":[{\"term\":{\"task.status\":\"unrecognized\"}},{\"term\":{\"task.status\":\"failed\"}}]}},{\"wildcard\":{\"task.taskType\":{\"value\":\"alerting:*\"}}},{\"range\":{\"task.runAt\":{\"gte\":\"now-1d\"}}}]}},\"aggs\":{\"by_status\":{\"terms\":{\"field\":\"task.status\",\"size\":10},\"aggs\":{\"by_task_type\":{\"terms\":{\"field\":\"task.taskType\",\"size\":33}}}}}}}`
|
||||
);
|
||||
expect(loggerCalls.debug[1][0]).toMatchInlineSnapshot(`
|
||||
"Error executing alerting telemetry task: getFailedAndUnrecognizedTasksPerDay - ResponseError: search_phase_execution_exception
|
||||
Caused by:
|
||||
no_shard_available_action_exception: This is the nested reason"
|
||||
`);
|
||||
// logger meta
|
||||
expect(loggerCalls.debug[1][1]?.tags).toEqual(['alerting', 'telemetry-failed']);
|
||||
expect(loggerCalls.debug[1][1]?.error?.stack_trace).toBeDefined();
|
||||
expect(loggerCalls.warn).toHaveLength(0);
|
||||
|
||||
expect(telemetry).toStrictEqual({
|
||||
errorMessage: 'no_shard_available_action_exception',
|
||||
hasErrors: true,
|
||||
countFailedAndUnrecognizedTasks: 0,
|
||||
countFailedAndUnrecognizedTasksByStatus: {},
|
||||
countFailedAndUnrecognizedTasksByStatusByType: {},
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -14,6 +14,7 @@ import type {
|
|||
import { ElasticsearchClient, Logger } from '@kbn/core/server';
|
||||
import { replaceDotSymbols } from './replace_dots_with_underscores';
|
||||
import { NUM_ALERTING_RULE_TYPES } from '../alerting_usage_collector';
|
||||
import { parseAndLogError } from './parse_and_log_error';
|
||||
|
||||
interface Opts {
|
||||
esClient: ElasticsearchClient;
|
||||
|
@ -122,16 +123,8 @@ export async function getFailedAndUnrecognizedTasksPerDay({
|
|||
countFailedAndUnrecognizedTasks: totalFailedAndUnrecognizedTasks ?? 0,
|
||||
};
|
||||
} catch (err) {
|
||||
const errorMessage = err && err.message ? err.message : err.toString();
|
||||
logger.warn(
|
||||
`Error executing alerting telemetry task: getFailedAndUnrecognizedTasksPerDay - ${JSON.stringify(
|
||||
err
|
||||
)}`,
|
||||
{
|
||||
tags: ['alerting', 'telemetry-failed'],
|
||||
error: { stack_trace: err.stack },
|
||||
}
|
||||
);
|
||||
const errorMessage = parseAndLogError(err, `getFailedAndUnrecognizedTasksPerDay`, logger);
|
||||
|
||||
return {
|
||||
hasErrors: true,
|
||||
errorMessage,
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { Logger } from '@kbn/core/server';
|
||||
|
||||
export function parseAndLogError(err: Error, errType: string, logger: Logger): string {
|
||||
const errorMessage = err && err.message ? err.message : err.toString();
|
||||
let returnedErrorMessage = errorMessage;
|
||||
|
||||
const errorStr = JSON.stringify(err);
|
||||
const logMessage = `Error executing alerting telemetry task: ${errType} - ${err}`;
|
||||
const logOptions = {
|
||||
tags: ['alerting', 'telemetry-failed'],
|
||||
error: { stack_trace: err.stack },
|
||||
};
|
||||
|
||||
// If error string contains "no_shard_available_action_exception", debug log it
|
||||
if (errorStr.includes('no_shard_available_action_exception')) {
|
||||
// the no_shard_available_action_exception can be wordy and the error message returned from this function
|
||||
// gets stored in the task state so lets simplify
|
||||
returnedErrorMessage = 'no_shard_available_action_exception';
|
||||
if (logger.isLevelEnabled('debug')) {
|
||||
logger.debug(logMessage, logOptions);
|
||||
}
|
||||
} else {
|
||||
logger.warn(logMessage, logOptions);
|
||||
}
|
||||
|
||||
return returnedErrorMessage;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue