mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 01:13:23 -04:00
[Response Ops] Log stack traces in alerting/actions/task executors on error (#133931)
* Showing stack track for alerting task runner * Adding stack traces for action and task running * wip * Updating unit tests * wip * Dont return stack trace in action result * Updating unit tests * Updating functional tests * Updating functional tests * Separate log for error * Separate log for error * Moving error log * Trying out putting stack trace in meta * two logs and tags * Adding tags to the error logging Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
parent
12b56ab525
commit
8da4cb29a7
8 changed files with 66 additions and 32 deletions
|
@ -50,6 +50,10 @@ export interface ActionTypeExecutorResult<Data> {
|
|||
retry?: null | boolean | Date;
|
||||
}
|
||||
|
||||
export type ActionTypeExecutorRawResult<Data> = ActionTypeExecutorResult<Data> & {
|
||||
error?: Error;
|
||||
};
|
||||
|
||||
export function isActionTypeExecutorResult(
|
||||
result: unknown
|
||||
): result is ActionTypeExecutorResult<unknown> {
|
||||
|
|
|
@ -516,13 +516,19 @@ test('logs a warning when alert executor has an error', async () => {
|
|||
);
|
||||
});
|
||||
|
||||
test('logs a warning when alert executor throws an error', async () => {
|
||||
test('logs a warning and error when alert executor throws an error', async () => {
|
||||
const executorMock = setupActionExecutorMock();
|
||||
executorMock.mockRejectedValue(new Error('this action execution is intended to fail'));
|
||||
const err = new Error('this action execution is intended to fail');
|
||||
err.stack = 'foo error\n stack 1\n stack 2\n stack 3';
|
||||
executorMock.mockRejectedValue(err);
|
||||
await actionExecutor.execute(executeParams);
|
||||
expect(loggerMock.warn).toBeCalledWith(
|
||||
'action execution failure: test:1: action-1: an error occurred while running the action: this action execution is intended to fail'
|
||||
);
|
||||
expect(loggerMock.error).toBeCalledWith(err, {
|
||||
error: { stack_trace: 'foo error\n stack 1\n stack 2\n stack 3' },
|
||||
tags: ['test', '1', 'action-run-failed'],
|
||||
});
|
||||
});
|
||||
|
||||
test('logs a warning when alert executor returns invalid status', async () => {
|
||||
|
|
|
@ -21,6 +21,7 @@ import {
|
|||
import {
|
||||
ActionType,
|
||||
ActionTypeExecutorResult,
|
||||
ActionTypeExecutorRawResult,
|
||||
ActionTypeRegistryContract,
|
||||
GetServicesFunction,
|
||||
PreConfiguredAction,
|
||||
|
@ -203,7 +204,7 @@ export class ActionExecutor {
|
|||
|
||||
eventLogger.logEvent(startEvent);
|
||||
|
||||
let rawResult: ActionTypeExecutorResult<unknown>;
|
||||
let rawResult: ActionTypeExecutorRawResult<unknown>;
|
||||
try {
|
||||
const { validatedParams, validatedConfig, validatedSecrets } = validateAction({
|
||||
actionId,
|
||||
|
@ -231,6 +232,7 @@ export class ActionExecutor {
|
|||
status: 'error',
|
||||
message: 'an error occurred while running the action',
|
||||
serviceMessage: err.message,
|
||||
error: err,
|
||||
retry: false,
|
||||
};
|
||||
}
|
||||
|
@ -256,6 +258,12 @@ export class ActionExecutor {
|
|||
event.message = `action execution failure: ${actionLabel}`;
|
||||
event.error = event.error || {};
|
||||
event.error.message = actionErrorToMessage(result);
|
||||
if (result.error) {
|
||||
logger.error(result.error, {
|
||||
tags: [actionTypeId, actionId, 'action-run-failed'],
|
||||
error: { stack_trace: result.error.stack },
|
||||
});
|
||||
}
|
||||
logger.warn(`action execution failure: ${actionLabel}: ${event.error.message}`);
|
||||
} else {
|
||||
span?.setOutcome('failure');
|
||||
|
@ -269,7 +277,8 @@ export class ActionExecutor {
|
|||
}
|
||||
|
||||
eventLogger.logEvent(event);
|
||||
return result;
|
||||
const { error, ...resultWithoutError } = result;
|
||||
return resultWithoutError;
|
||||
}
|
||||
);
|
||||
}
|
||||
|
@ -393,7 +402,7 @@ async function getActionInfoInternal(
|
|||
};
|
||||
}
|
||||
|
||||
function actionErrorToMessage(result: ActionTypeExecutorResult<unknown>): string {
|
||||
function actionErrorToMessage(result: ActionTypeExecutorRawResult<unknown>): string {
|
||||
let message = result.message || 'unknown error running action';
|
||||
|
||||
if (result.serviceMessage) {
|
||||
|
|
|
@ -22,7 +22,7 @@ import { ActionTypeExecutorResult } from '../common';
|
|||
import { TaskInfo } from './lib/action_executor';
|
||||
import { ConnectorTokenClient } from './builtin_action_types/lib/connector_token_client';
|
||||
|
||||
export type { ActionTypeExecutorResult } from '../common';
|
||||
export type { ActionTypeExecutorResult, ActionTypeExecutorRawResult } from '../common';
|
||||
export type { GetFieldsByIssueTypeResponse as JiraGetFieldsResponse } from './builtin_action_types/jira/types';
|
||||
export type { GetCommonFieldsResponse as ServiceNowGetFieldsResponse } from './builtin_action_types/servicenow/types';
|
||||
export type { GetCommonFieldsResponse as ResilientGetFieldsResponse } from './builtin_action_types/resilient/types';
|
||||
|
|
|
@ -87,6 +87,7 @@ jest.mock('../lib/wrap_scoped_cluster_client', () => ({
|
|||
jest.mock('../lib/alerting_event_logger/alerting_event_logger');
|
||||
|
||||
let fakeTimer: sinon.SinonFakeTimers;
|
||||
const logger: ReturnType<typeof loggingSystemMock.createLogger> = loggingSystemMock.createLogger();
|
||||
|
||||
const mockUsageCountersSetup = usageCountersServiceMock.createSetupContract();
|
||||
const mockUsageCounter = mockUsageCountersSetup.createUsageCounter('test');
|
||||
|
@ -139,7 +140,7 @@ describe('Task Runner', () => {
|
|||
actionsPlugin: actionsMock.createStart(),
|
||||
getRulesClientWithRequest: jest.fn().mockReturnValue(rulesClient),
|
||||
encryptedSavedObjectsClient,
|
||||
logger: loggingSystemMock.create().get(),
|
||||
logger,
|
||||
executionContext: executionContextServiceMock.createInternalStartContract(),
|
||||
spaceIdToNamespace: jest.fn().mockReturnValue(undefined),
|
||||
basePathService: httpServiceMock.createBasePath(),
|
||||
|
@ -258,7 +259,6 @@ describe('Task Runner', () => {
|
|||
expect(call.services.scopedClusterClient).toBeTruthy();
|
||||
expect(call.services).toBeTruthy();
|
||||
|
||||
const logger = taskRunnerFactoryInitializerParams.logger;
|
||||
expect(logger.debug).toHaveBeenCalledTimes(4);
|
||||
expect(logger.debug).nthCalledWith(1, 'executing rule test:1 at 1970-01-01T00:00:00.000Z');
|
||||
expect(logger.debug).nthCalledWith(
|
||||
|
@ -331,7 +331,6 @@ describe('Task Runner', () => {
|
|||
expect(enqueueFunction).toHaveBeenCalledTimes(1);
|
||||
expect(enqueueFunction).toHaveBeenCalledWith(generateEnqueueFunctionInput());
|
||||
|
||||
const logger = customTaskRunnerFactoryInitializerParams.logger;
|
||||
expect(logger.debug).toHaveBeenCalledTimes(5);
|
||||
expect(logger.debug).nthCalledWith(1, 'executing rule test:1 at 1970-01-01T00:00:00.000Z');
|
||||
expect(logger.debug).nthCalledWith(
|
||||
|
@ -415,7 +414,6 @@ describe('Task Runner', () => {
|
|||
await taskRunner.run();
|
||||
expect(actionsClient.ephemeralEnqueuedExecution).toHaveBeenCalledTimes(0);
|
||||
|
||||
const logger = taskRunnerFactoryInitializerParams.logger;
|
||||
expect(logger.debug).toHaveBeenCalledTimes(6);
|
||||
expect(logger.debug).nthCalledWith(1, 'executing rule test:1 at 1970-01-01T00:00:00.000Z');
|
||||
expect(logger.debug).nthCalledWith(
|
||||
|
@ -541,7 +539,6 @@ describe('Task Runner', () => {
|
|||
expect(actionsClient.enqueueExecution).toHaveBeenCalledTimes(expectedExecutions);
|
||||
expect(actionsClient.ephemeralEnqueuedExecution).toHaveBeenCalledTimes(0);
|
||||
|
||||
const logger = taskRunnerFactoryInitializerParams.logger;
|
||||
const expectedMessage = `no scheduling of actions for rule test:1: '${RULE_NAME}': rule is snoozed.`;
|
||||
if (expectedExecutions) {
|
||||
expect(logger.debug).not.toHaveBeenCalledWith(expectedMessage);
|
||||
|
@ -591,7 +588,6 @@ describe('Task Runner', () => {
|
|||
await taskRunner.run();
|
||||
expect(enqueueFunction).toHaveBeenCalledTimes(1);
|
||||
|
||||
const logger = customTaskRunnerFactoryInitializerParams.logger;
|
||||
expect(logger.debug).toHaveBeenCalledTimes(6);
|
||||
expect(logger.debug).nthCalledWith(1, 'executing rule test:1 at 1970-01-01T00:00:00.000Z');
|
||||
expect(logger.debug).nthCalledWith(
|
||||
|
@ -671,7 +667,6 @@ describe('Task Runner', () => {
|
|||
await taskRunner.run();
|
||||
// expect(enqueueFunction).toHaveBeenCalledTimes(1);
|
||||
|
||||
const logger = customTaskRunnerFactoryInitializerParams.logger;
|
||||
// expect(logger.debug).toHaveBeenCalledTimes(5);
|
||||
expect(logger.debug).nthCalledWith(
|
||||
3,
|
||||
|
@ -716,7 +711,6 @@ describe('Task Runner', () => {
|
|||
encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue(SAVED_OBJECT);
|
||||
await taskRunner.run();
|
||||
expect(enqueueFunction).toHaveBeenCalledTimes(1);
|
||||
const logger = customTaskRunnerFactoryInitializerParams.logger;
|
||||
expect(logger.debug).toHaveBeenCalledTimes(6);
|
||||
expect(logger.debug).nthCalledWith(
|
||||
3,
|
||||
|
@ -1099,7 +1093,6 @@ describe('Task Runner', () => {
|
|||
generateAlertInstance({ id: 1, duration: MOCK_DURATION, start: DATE_1969 })
|
||||
);
|
||||
|
||||
const logger = customTaskRunnerFactoryInitializerParams.logger;
|
||||
expect(logger.debug).toHaveBeenCalledTimes(6);
|
||||
expect(logger.debug).nthCalledWith(1, 'executing rule test:1 at 1970-01-01T00:00:00.000Z');
|
||||
expect(logger.debug).nthCalledWith(
|
||||
|
@ -1210,7 +1203,6 @@ describe('Task Runner', () => {
|
|||
const runnerResult = await taskRunner.run();
|
||||
expect(runnerResult.state.alertInstances).toEqual(generateAlertInstance());
|
||||
|
||||
const logger = customTaskRunnerFactoryInitializerParams.logger;
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
`rule test:1: '${RULE_NAME}' has 1 active alerts: [{\"instanceId\":\"1\",\"actionGroup\":\"default\"}]`
|
||||
);
|
||||
|
@ -1455,9 +1447,13 @@ describe('Task Runner', () => {
|
|||
encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValueOnce(SAVED_OBJECT);
|
||||
const runnerResult = await taskRunner.run();
|
||||
expect(runnerResult).toEqual(generateRunnerResult({ successRatio: 0 }));
|
||||
expect(taskRunnerFactoryInitializerParams.logger.error).toHaveBeenCalledWith(
|
||||
`Executing Rule foo:test:1 has resulted in Error: params invalid: [param1]: expected value of type [string] but got [undefined]`
|
||||
const loggerCall = logger.error.mock.calls[0][0];
|
||||
const loggerMeta = logger.error.mock.calls[0][1];
|
||||
expect(loggerCall as string).toMatchInlineSnapshot(
|
||||
`"Executing Rule foo:test:1 has resulted in Error: params invalid: [param1]: expected value of type [string] but got [undefined]"`
|
||||
);
|
||||
expect(loggerMeta?.tags).toEqual(['test', '1', 'rule-run-failed']);
|
||||
expect(loggerMeta?.error?.stack_trace).toBeDefined();
|
||||
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
|
@ -1583,6 +1579,12 @@ describe('Task Runner', () => {
|
|||
});
|
||||
|
||||
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
|
||||
|
||||
const loggerCall = logger.error.mock.calls[0][0];
|
||||
const loggerMeta = logger.error.mock.calls[0][1];
|
||||
expect(loggerCall as string).toMatchInlineSnapshot(`[Error: GENERIC ERROR MESSAGE]`);
|
||||
expect(loggerMeta?.tags).toEqual(['test', '1', 'rule-run-failed']);
|
||||
expect(loggerMeta?.error?.stack_trace).toBeDefined();
|
||||
});
|
||||
|
||||
test('recovers gracefully when the Alert Task Runner throws an exception when fetching the encrypted attributes', async () => {
|
||||
|
@ -1789,12 +1791,13 @@ describe('Task Runner', () => {
|
|||
|
||||
encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue(SAVED_OBJECT);
|
||||
|
||||
const logger = taskRunnerFactoryInitializerParams.logger;
|
||||
return taskRunner.run().catch((ex) => {
|
||||
expect(ex.toString()).toEqual(`Error: Saved object [alert/1] not found`);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
`Executing Rule foo:test:1 has resulted in Error: Saved object [alert/1] not found`
|
||||
const executeRuleDebugLogger = logger.debug.mock.calls[3][0];
|
||||
expect(executeRuleDebugLogger as string).toMatchInlineSnapshot(
|
||||
`"Executing Rule foo:test:1 has resulted in Error: Saved object [alert/1] not found"`
|
||||
);
|
||||
expect(logger.error).not.toHaveBeenCalled();
|
||||
expect(logger.warn).toHaveBeenCalledTimes(1);
|
||||
expect(logger.warn).nthCalledWith(
|
||||
1,
|
||||
|
@ -1870,12 +1873,13 @@ describe('Task Runner', () => {
|
|||
|
||||
encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue(SAVED_OBJECT);
|
||||
|
||||
const logger = taskRunnerFactoryInitializerParams.logger;
|
||||
return taskRunner.run().catch((ex) => {
|
||||
expect(ex.toString()).toEqual(`Error: Saved object [alert/1] not found`);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
`Executing Rule test space:test:1 has resulted in Error: Saved object [alert/1] not found`
|
||||
const ruleExecuteDebugLog = logger.debug.mock.calls[3][0];
|
||||
expect(ruleExecuteDebugLog as string).toMatchInlineSnapshot(
|
||||
`"Executing Rule test space:test:1 has resulted in Error: Saved object [alert/1] not found"`
|
||||
);
|
||||
expect(logger.error).not.toHaveBeenCalled();
|
||||
expect(logger.warn).toHaveBeenCalledTimes(1);
|
||||
expect(logger.warn).nthCalledWith(
|
||||
1,
|
||||
|
@ -2325,7 +2329,6 @@ describe('Task Runner', () => {
|
|||
expect(call.services.scopedClusterClient).toBeTruthy();
|
||||
expect(call.services).toBeTruthy();
|
||||
|
||||
const logger = taskRunnerFactoryInitializerParams.logger;
|
||||
expect(logger.debug).toHaveBeenCalledTimes(4);
|
||||
expect(logger.debug).nthCalledWith(1, 'executing rule test:1 at 1970-01-01T00:00:00.000Z');
|
||||
expect(logger.debug).nthCalledWith(
|
||||
|
@ -2584,7 +2587,6 @@ describe('Task Runner', () => {
|
|||
})
|
||||
);
|
||||
|
||||
const logger = taskRunnerFactoryInitializerParams.logger;
|
||||
expect(logger.debug).toHaveBeenCalledTimes(6);
|
||||
|
||||
expect(logger.debug).nthCalledWith(
|
||||
|
@ -2750,7 +2752,6 @@ describe('Task Runner', () => {
|
|||
})
|
||||
);
|
||||
|
||||
const logger = taskRunnerFactoryInitializerParams.logger;
|
||||
expect(logger.debug).toHaveBeenCalledTimes(6);
|
||||
|
||||
expect(logger.debug).nthCalledWith(
|
||||
|
|
|
@ -397,7 +397,10 @@ export class TaskRunner<
|
|||
`rule execution failure: ${ruleLabel}`,
|
||||
err.message
|
||||
);
|
||||
|
||||
this.logger.error(err, {
|
||||
tags: [this.ruleType.id, ruleId, 'rule-run-failed'],
|
||||
error: { stack_trace: err.stack },
|
||||
});
|
||||
throw new ErrorWithReason(RuleExecutionStatusErrorReasons.Execute, err);
|
||||
}
|
||||
|
||||
|
@ -724,7 +727,10 @@ export class TaskRunner<
|
|||
if (isAlertSavedObjectNotFoundError(err, ruleId)) {
|
||||
this.logger.debug(message);
|
||||
} else {
|
||||
this.logger.error(message);
|
||||
this.logger.error(message, {
|
||||
tags: [this.ruleType.id, ruleId, 'rule-run-failed'],
|
||||
error: { stack_trace: err.stack },
|
||||
});
|
||||
}
|
||||
return originalState;
|
||||
}
|
||||
|
|
|
@ -719,8 +719,8 @@ describe('TaskManagerRunner', () => {
|
|||
});
|
||||
expect(mockApmTrans.end).toHaveBeenCalledWith('success');
|
||||
});
|
||||
test('makes calls to APM as expected when task fails', async () => {
|
||||
const { runner } = await readyToRunStageSetup({
|
||||
test('makes calls to APM and logs errors as expected when task fails', async () => {
|
||||
const { runner, logger } = await readyToRunStageSetup({
|
||||
instance: {
|
||||
params: { a: 'b' },
|
||||
state: { hey: 'there' },
|
||||
|
@ -741,6 +741,11 @@ describe('TaskManagerRunner', () => {
|
|||
childOf: 'apmTraceparent',
|
||||
});
|
||||
expect(mockApmTrans.end).toHaveBeenCalledWith('failure');
|
||||
const loggerCall = logger.error.mock.calls[0][0];
|
||||
const loggerMeta = logger.error.mock.calls[0][1];
|
||||
expect(loggerCall as string).toMatchInlineSnapshot(`"Task bar \\"foo\\" failed: Error: rar"`);
|
||||
expect(loggerMeta?.tags).toEqual(['bar', 'foo', 'task-run-failed']);
|
||||
expect(loggerMeta?.error?.stack_trace).toBeDefined();
|
||||
});
|
||||
test('provides execution context on run', async () => {
|
||||
const { runner } = await readyToRunStageSetup({
|
||||
|
|
|
@ -313,7 +313,10 @@ export class TaskManagerRunner implements TaskRunner {
|
|||
if (apmTrans) apmTrans.end('success');
|
||||
return processedResult;
|
||||
} catch (err) {
|
||||
this.logger.error(`Task ${this} failed: ${err}`);
|
||||
this.logger.error(`Task ${this} failed: ${err}`, {
|
||||
tags: [this.taskType, this.instance.task.id, 'task-run-failed'],
|
||||
error: { stack_trace: err.stack },
|
||||
});
|
||||
// in error scenario, we can not get the RunResult
|
||||
// re-use modifiedContext's state, which is correct as of beforeRun
|
||||
const processedResult = await withSpan({ name: 'process result', type: 'task manager' }, () =>
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue