[Response Ops] Log stack traces in alerting/actions/task executors on error (#133931)

* Showing stack track for alerting task runner

* Adding stack traces for action and task running

* wip

* Updating unit tests

* wip

* Dont return stack trace in action result

* Updating unit tests

* Updating functional tests

* Updating functional tests

* Separate log for error

* Separate log for error

* Moving error log

* Trying out putting stack trace in meta

* two logs and tags

* Adding tags to the error logging

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Ying Mao 2022-06-16 13:57:39 -04:00 committed by GitHub
parent 12b56ab525
commit 8da4cb29a7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 66 additions and 32 deletions

View file

@ -50,6 +50,10 @@ export interface ActionTypeExecutorResult<Data> {
retry?: null | boolean | Date;
}
export type ActionTypeExecutorRawResult<Data> = ActionTypeExecutorResult<Data> & {
error?: Error;
};
export function isActionTypeExecutorResult(
result: unknown
): result is ActionTypeExecutorResult<unknown> {

View file

@ -516,13 +516,19 @@ test('logs a warning when alert executor has an error', async () => {
);
});
test('logs a warning when alert executor throws an error', async () => {
test('logs a warning and error when alert executor throws an error', async () => {
const executorMock = setupActionExecutorMock();
executorMock.mockRejectedValue(new Error('this action execution is intended to fail'));
const err = new Error('this action execution is intended to fail');
err.stack = 'foo error\n stack 1\n stack 2\n stack 3';
executorMock.mockRejectedValue(err);
await actionExecutor.execute(executeParams);
expect(loggerMock.warn).toBeCalledWith(
'action execution failure: test:1: action-1: an error occurred while running the action: this action execution is intended to fail'
);
expect(loggerMock.error).toBeCalledWith(err, {
error: { stack_trace: 'foo error\n stack 1\n stack 2\n stack 3' },
tags: ['test', '1', 'action-run-failed'],
});
});
test('logs a warning when alert executor returns invalid status', async () => {

View file

@ -21,6 +21,7 @@ import {
import {
ActionType,
ActionTypeExecutorResult,
ActionTypeExecutorRawResult,
ActionTypeRegistryContract,
GetServicesFunction,
PreConfiguredAction,
@ -203,7 +204,7 @@ export class ActionExecutor {
eventLogger.logEvent(startEvent);
let rawResult: ActionTypeExecutorResult<unknown>;
let rawResult: ActionTypeExecutorRawResult<unknown>;
try {
const { validatedParams, validatedConfig, validatedSecrets } = validateAction({
actionId,
@ -231,6 +232,7 @@ export class ActionExecutor {
status: 'error',
message: 'an error occurred while running the action',
serviceMessage: err.message,
error: err,
retry: false,
};
}
@ -256,6 +258,12 @@ export class ActionExecutor {
event.message = `action execution failure: ${actionLabel}`;
event.error = event.error || {};
event.error.message = actionErrorToMessage(result);
if (result.error) {
logger.error(result.error, {
tags: [actionTypeId, actionId, 'action-run-failed'],
error: { stack_trace: result.error.stack },
});
}
logger.warn(`action execution failure: ${actionLabel}: ${event.error.message}`);
} else {
span?.setOutcome('failure');
@ -269,7 +277,8 @@ export class ActionExecutor {
}
eventLogger.logEvent(event);
return result;
const { error, ...resultWithoutError } = result;
return resultWithoutError;
}
);
}
@ -393,7 +402,7 @@ async function getActionInfoInternal(
};
}
function actionErrorToMessage(result: ActionTypeExecutorResult<unknown>): string {
function actionErrorToMessage(result: ActionTypeExecutorRawResult<unknown>): string {
let message = result.message || 'unknown error running action';
if (result.serviceMessage) {

View file

@ -22,7 +22,7 @@ import { ActionTypeExecutorResult } from '../common';
import { TaskInfo } from './lib/action_executor';
import { ConnectorTokenClient } from './builtin_action_types/lib/connector_token_client';
export type { ActionTypeExecutorResult } from '../common';
export type { ActionTypeExecutorResult, ActionTypeExecutorRawResult } from '../common';
export type { GetFieldsByIssueTypeResponse as JiraGetFieldsResponse } from './builtin_action_types/jira/types';
export type { GetCommonFieldsResponse as ServiceNowGetFieldsResponse } from './builtin_action_types/servicenow/types';
export type { GetCommonFieldsResponse as ResilientGetFieldsResponse } from './builtin_action_types/resilient/types';

View file

@ -87,6 +87,7 @@ jest.mock('../lib/wrap_scoped_cluster_client', () => ({
jest.mock('../lib/alerting_event_logger/alerting_event_logger');
let fakeTimer: sinon.SinonFakeTimers;
const logger: ReturnType<typeof loggingSystemMock.createLogger> = loggingSystemMock.createLogger();
const mockUsageCountersSetup = usageCountersServiceMock.createSetupContract();
const mockUsageCounter = mockUsageCountersSetup.createUsageCounter('test');
@ -139,7 +140,7 @@ describe('Task Runner', () => {
actionsPlugin: actionsMock.createStart(),
getRulesClientWithRequest: jest.fn().mockReturnValue(rulesClient),
encryptedSavedObjectsClient,
logger: loggingSystemMock.create().get(),
logger,
executionContext: executionContextServiceMock.createInternalStartContract(),
spaceIdToNamespace: jest.fn().mockReturnValue(undefined),
basePathService: httpServiceMock.createBasePath(),
@ -258,7 +259,6 @@ describe('Task Runner', () => {
expect(call.services.scopedClusterClient).toBeTruthy();
expect(call.services).toBeTruthy();
const logger = taskRunnerFactoryInitializerParams.logger;
expect(logger.debug).toHaveBeenCalledTimes(4);
expect(logger.debug).nthCalledWith(1, 'executing rule test:1 at 1970-01-01T00:00:00.000Z');
expect(logger.debug).nthCalledWith(
@ -331,7 +331,6 @@ describe('Task Runner', () => {
expect(enqueueFunction).toHaveBeenCalledTimes(1);
expect(enqueueFunction).toHaveBeenCalledWith(generateEnqueueFunctionInput());
const logger = customTaskRunnerFactoryInitializerParams.logger;
expect(logger.debug).toHaveBeenCalledTimes(5);
expect(logger.debug).nthCalledWith(1, 'executing rule test:1 at 1970-01-01T00:00:00.000Z');
expect(logger.debug).nthCalledWith(
@ -415,7 +414,6 @@ describe('Task Runner', () => {
await taskRunner.run();
expect(actionsClient.ephemeralEnqueuedExecution).toHaveBeenCalledTimes(0);
const logger = taskRunnerFactoryInitializerParams.logger;
expect(logger.debug).toHaveBeenCalledTimes(6);
expect(logger.debug).nthCalledWith(1, 'executing rule test:1 at 1970-01-01T00:00:00.000Z');
expect(logger.debug).nthCalledWith(
@ -541,7 +539,6 @@ describe('Task Runner', () => {
expect(actionsClient.enqueueExecution).toHaveBeenCalledTimes(expectedExecutions);
expect(actionsClient.ephemeralEnqueuedExecution).toHaveBeenCalledTimes(0);
const logger = taskRunnerFactoryInitializerParams.logger;
const expectedMessage = `no scheduling of actions for rule test:1: '${RULE_NAME}': rule is snoozed.`;
if (expectedExecutions) {
expect(logger.debug).not.toHaveBeenCalledWith(expectedMessage);
@ -591,7 +588,6 @@ describe('Task Runner', () => {
await taskRunner.run();
expect(enqueueFunction).toHaveBeenCalledTimes(1);
const logger = customTaskRunnerFactoryInitializerParams.logger;
expect(logger.debug).toHaveBeenCalledTimes(6);
expect(logger.debug).nthCalledWith(1, 'executing rule test:1 at 1970-01-01T00:00:00.000Z');
expect(logger.debug).nthCalledWith(
@ -671,7 +667,6 @@ describe('Task Runner', () => {
await taskRunner.run();
// expect(enqueueFunction).toHaveBeenCalledTimes(1);
const logger = customTaskRunnerFactoryInitializerParams.logger;
// expect(logger.debug).toHaveBeenCalledTimes(5);
expect(logger.debug).nthCalledWith(
3,
@ -716,7 +711,6 @@ describe('Task Runner', () => {
encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue(SAVED_OBJECT);
await taskRunner.run();
expect(enqueueFunction).toHaveBeenCalledTimes(1);
const logger = customTaskRunnerFactoryInitializerParams.logger;
expect(logger.debug).toHaveBeenCalledTimes(6);
expect(logger.debug).nthCalledWith(
3,
@ -1099,7 +1093,6 @@ describe('Task Runner', () => {
generateAlertInstance({ id: 1, duration: MOCK_DURATION, start: DATE_1969 })
);
const logger = customTaskRunnerFactoryInitializerParams.logger;
expect(logger.debug).toHaveBeenCalledTimes(6);
expect(logger.debug).nthCalledWith(1, 'executing rule test:1 at 1970-01-01T00:00:00.000Z');
expect(logger.debug).nthCalledWith(
@ -1210,7 +1203,6 @@ describe('Task Runner', () => {
const runnerResult = await taskRunner.run();
expect(runnerResult.state.alertInstances).toEqual(generateAlertInstance());
const logger = customTaskRunnerFactoryInitializerParams.logger;
expect(logger.debug).toHaveBeenCalledWith(
`rule test:1: '${RULE_NAME}' has 1 active alerts: [{\"instanceId\":\"1\",\"actionGroup\":\"default\"}]`
);
@ -1455,9 +1447,13 @@ describe('Task Runner', () => {
encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValueOnce(SAVED_OBJECT);
const runnerResult = await taskRunner.run();
expect(runnerResult).toEqual(generateRunnerResult({ successRatio: 0 }));
expect(taskRunnerFactoryInitializerParams.logger.error).toHaveBeenCalledWith(
`Executing Rule foo:test:1 has resulted in Error: params invalid: [param1]: expected value of type [string] but got [undefined]`
const loggerCall = logger.error.mock.calls[0][0];
const loggerMeta = logger.error.mock.calls[0][1];
expect(loggerCall as string).toMatchInlineSnapshot(
`"Executing Rule foo:test:1 has resulted in Error: params invalid: [param1]: expected value of type [string] but got [undefined]"`
);
expect(loggerMeta?.tags).toEqual(['test', '1', 'rule-run-failed']);
expect(loggerMeta?.error?.stack_trace).toBeDefined();
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});
@ -1583,6 +1579,12 @@ describe('Task Runner', () => {
});
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
const loggerCall = logger.error.mock.calls[0][0];
const loggerMeta = logger.error.mock.calls[0][1];
expect(loggerCall as string).toMatchInlineSnapshot(`[Error: GENERIC ERROR MESSAGE]`);
expect(loggerMeta?.tags).toEqual(['test', '1', 'rule-run-failed']);
expect(loggerMeta?.error?.stack_trace).toBeDefined();
});
test('recovers gracefully when the Alert Task Runner throws an exception when fetching the encrypted attributes', async () => {
@ -1789,12 +1791,13 @@ describe('Task Runner', () => {
encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue(SAVED_OBJECT);
const logger = taskRunnerFactoryInitializerParams.logger;
return taskRunner.run().catch((ex) => {
expect(ex.toString()).toEqual(`Error: Saved object [alert/1] not found`);
expect(logger.debug).toHaveBeenCalledWith(
`Executing Rule foo:test:1 has resulted in Error: Saved object [alert/1] not found`
const executeRuleDebugLogger = logger.debug.mock.calls[3][0];
expect(executeRuleDebugLogger as string).toMatchInlineSnapshot(
`"Executing Rule foo:test:1 has resulted in Error: Saved object [alert/1] not found"`
);
expect(logger.error).not.toHaveBeenCalled();
expect(logger.warn).toHaveBeenCalledTimes(1);
expect(logger.warn).nthCalledWith(
1,
@ -1870,12 +1873,13 @@ describe('Task Runner', () => {
encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue(SAVED_OBJECT);
const logger = taskRunnerFactoryInitializerParams.logger;
return taskRunner.run().catch((ex) => {
expect(ex.toString()).toEqual(`Error: Saved object [alert/1] not found`);
expect(logger.debug).toHaveBeenCalledWith(
`Executing Rule test space:test:1 has resulted in Error: Saved object [alert/1] not found`
const ruleExecuteDebugLog = logger.debug.mock.calls[3][0];
expect(ruleExecuteDebugLog as string).toMatchInlineSnapshot(
`"Executing Rule test space:test:1 has resulted in Error: Saved object [alert/1] not found"`
);
expect(logger.error).not.toHaveBeenCalled();
expect(logger.warn).toHaveBeenCalledTimes(1);
expect(logger.warn).nthCalledWith(
1,
@ -2325,7 +2329,6 @@ describe('Task Runner', () => {
expect(call.services.scopedClusterClient).toBeTruthy();
expect(call.services).toBeTruthy();
const logger = taskRunnerFactoryInitializerParams.logger;
expect(logger.debug).toHaveBeenCalledTimes(4);
expect(logger.debug).nthCalledWith(1, 'executing rule test:1 at 1970-01-01T00:00:00.000Z');
expect(logger.debug).nthCalledWith(
@ -2584,7 +2587,6 @@ describe('Task Runner', () => {
})
);
const logger = taskRunnerFactoryInitializerParams.logger;
expect(logger.debug).toHaveBeenCalledTimes(6);
expect(logger.debug).nthCalledWith(
@ -2750,7 +2752,6 @@ describe('Task Runner', () => {
})
);
const logger = taskRunnerFactoryInitializerParams.logger;
expect(logger.debug).toHaveBeenCalledTimes(6);
expect(logger.debug).nthCalledWith(

View file

@ -397,7 +397,10 @@ export class TaskRunner<
`rule execution failure: ${ruleLabel}`,
err.message
);
this.logger.error(err, {
tags: [this.ruleType.id, ruleId, 'rule-run-failed'],
error: { stack_trace: err.stack },
});
throw new ErrorWithReason(RuleExecutionStatusErrorReasons.Execute, err);
}
@ -724,7 +727,10 @@ export class TaskRunner<
if (isAlertSavedObjectNotFoundError(err, ruleId)) {
this.logger.debug(message);
} else {
this.logger.error(message);
this.logger.error(message, {
tags: [this.ruleType.id, ruleId, 'rule-run-failed'],
error: { stack_trace: err.stack },
});
}
return originalState;
}

View file

@ -719,8 +719,8 @@ describe('TaskManagerRunner', () => {
});
expect(mockApmTrans.end).toHaveBeenCalledWith('success');
});
test('makes calls to APM as expected when task fails', async () => {
const { runner } = await readyToRunStageSetup({
test('makes calls to APM and logs errors as expected when task fails', async () => {
const { runner, logger } = await readyToRunStageSetup({
instance: {
params: { a: 'b' },
state: { hey: 'there' },
@ -741,6 +741,11 @@ describe('TaskManagerRunner', () => {
childOf: 'apmTraceparent',
});
expect(mockApmTrans.end).toHaveBeenCalledWith('failure');
const loggerCall = logger.error.mock.calls[0][0];
const loggerMeta = logger.error.mock.calls[0][1];
expect(loggerCall as string).toMatchInlineSnapshot(`"Task bar \\"foo\\" failed: Error: rar"`);
expect(loggerMeta?.tags).toEqual(['bar', 'foo', 'task-run-failed']);
expect(loggerMeta?.error?.stack_trace).toBeDefined();
});
test('provides execution context on run', async () => {
const { runner } = await readyToRunStageSetup({

View file

@ -313,7 +313,10 @@ export class TaskManagerRunner implements TaskRunner {
if (apmTrans) apmTrans.end('success');
return processedResult;
} catch (err) {
this.logger.error(`Task ${this} failed: ${err}`);
this.logger.error(`Task ${this} failed: ${err}`, {
tags: [this.taskType, this.instance.task.id, 'task-run-failed'],
error: { stack_trace: err.stack },
});
// in error scenario, we can not get the RunResult
// re-use modifiedContext's state, which is correct as of beforeRun
const processedResult = await withSpan({ name: 'process result', type: 'task manager' }, () =>