mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 17:59:23 -04:00
1294 lines
40 KiB
TypeScript
1294 lines
40 KiB
TypeScript
/*
|
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
|
* or more contributor license agreements. Licensed under the Elastic License
|
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
|
* 2.0.
|
|
*/
|
|
import apm from 'elastic-apm-node';
|
|
import { cloneDeep, mapValues, omit, pickBy, set, without } from 'lodash';
|
|
import type { Request } from '@hapi/hapi';
|
|
import { UsageCounter } from 'src/plugins/usage_collection/server';
|
|
import uuid from 'uuid';
|
|
import { addSpaceIdToPath } from '../../../spaces/server';
|
|
import { KibanaRequest, Logger } from '../../../../../src/core/server';
|
|
import { TaskRunnerContext } from './task_runner_factory';
|
|
import { ConcreteTaskInstance, throwUnrecoverableError } from '../../../task_manager/server';
|
|
import { createExecutionHandler, ExecutionHandler } from './create_execution_handler';
|
|
import { Alert as CreatedAlert, createAlertFactory } from '../alert';
|
|
import {
|
|
createWrappedScopedClusterClientFactory,
|
|
ElasticsearchError,
|
|
ErrorWithReason,
|
|
executionStatusFromError,
|
|
executionStatusFromState,
|
|
getRecoveredAlerts,
|
|
ruleExecutionStatusToRaw,
|
|
validateRuleTypeParams,
|
|
} from '../lib';
|
|
import {
|
|
Alert,
|
|
AlertExecutionStatus,
|
|
AlertExecutionStatusErrorReasons,
|
|
IntervalSchedule,
|
|
RawAlertInstance,
|
|
RawRule,
|
|
RawRuleExecutionStatus,
|
|
RuleExecutionRunResult,
|
|
RuleExecutionState,
|
|
RuleMonitoring,
|
|
RuleMonitoringHistory,
|
|
RuleTaskState,
|
|
RuleTypeRegistry,
|
|
SanitizedAlert,
|
|
} from '../types';
|
|
import { asErr, asOk, map, promiseResult, resolveErr, Resultable } from '../lib/result_type';
|
|
import { getExecutionDurationPercentiles, getExecutionSuccessRatio } from '../lib/monitoring';
|
|
import { taskInstanceToAlertTaskInstance } from './alert_task_instance';
|
|
import { EVENT_LOG_ACTIONS } from '../plugin';
|
|
import { IEvent, SAVED_OBJECT_REL_PRIMARY } from '../../../event_log/server';
|
|
import { isAlertSavedObjectNotFoundError, isEsUnavailableError } from '../lib/is_alerting_error';
|
|
import { partiallyUpdateAlert } from '../saved_objects';
|
|
import {
|
|
AlertInstanceContext,
|
|
AlertInstanceState,
|
|
AlertTypeParams,
|
|
AlertTypeState,
|
|
MONITORING_HISTORY_LIMIT,
|
|
parseDuration,
|
|
WithoutReservedActionGroups,
|
|
} from '../../common';
|
|
import { NormalizedRuleType, UntypedNormalizedRuleType } from '../rule_type_registry';
|
|
import { getEsErrorMessage } from '../lib/errors';
|
|
import {
|
|
createAlertEventLogRecordObject,
|
|
Event,
|
|
} from '../lib/create_alert_event_log_record_object';
|
|
import { InMemoryMetrics, IN_MEMORY_METRICS } from '../monitoring';
|
|
import {
|
|
ActionsCompletion,
|
|
AlertExecutionStore,
|
|
GenerateNewAndRecoveredAlertEventsParams,
|
|
LogActiveAndRecoveredAlertsParams,
|
|
RuleTaskInstance,
|
|
RuleTaskRunResult,
|
|
ScheduleActionsForRecoveredAlertsParams,
|
|
TrackAlertDurationsParams,
|
|
} from './types';
|
|
|
|
const FALLBACK_RETRY_INTERVAL = '5m';
|
|
const CONNECTIVITY_RETRY_INTERVAL = '5m';
|
|
|
|
// 1,000,000 nanoseconds in 1 millisecond
|
|
const Millis2Nanos = 1000 * 1000;
|
|
|
|
export const getDefaultRuleMonitoring = (): RuleMonitoring => ({
|
|
execution: {
|
|
history: [],
|
|
calculated_metrics: {
|
|
success_ratio: 0,
|
|
},
|
|
},
|
|
});
|
|
|
|
export class TaskRunner<
|
|
Params extends AlertTypeParams,
|
|
ExtractedParams extends AlertTypeParams,
|
|
State extends AlertTypeState,
|
|
InstanceState extends AlertInstanceState,
|
|
InstanceContext extends AlertInstanceContext,
|
|
ActionGroupIds extends string,
|
|
RecoveryActionGroupId extends string
|
|
> {
|
|
private context: TaskRunnerContext;
|
|
private logger: Logger;
|
|
private taskInstance: RuleTaskInstance;
|
|
private ruleName: string | null;
|
|
private ruleConsumer: string | null;
|
|
private ruleType: NormalizedRuleType<
|
|
Params,
|
|
ExtractedParams,
|
|
State,
|
|
InstanceState,
|
|
InstanceContext,
|
|
ActionGroupIds,
|
|
RecoveryActionGroupId
|
|
>;
|
|
private readonly executionId: string;
|
|
private readonly ruleTypeRegistry: RuleTypeRegistry;
|
|
private readonly inMemoryMetrics: InMemoryMetrics;
|
|
private usageCounter?: UsageCounter;
|
|
private searchAbortController: AbortController;
|
|
private cancelled: boolean;
|
|
|
|
constructor(
|
|
ruleType: NormalizedRuleType<
|
|
Params,
|
|
ExtractedParams,
|
|
State,
|
|
InstanceState,
|
|
InstanceContext,
|
|
ActionGroupIds,
|
|
RecoveryActionGroupId
|
|
>,
|
|
taskInstance: ConcreteTaskInstance,
|
|
context: TaskRunnerContext,
|
|
inMemoryMetrics: InMemoryMetrics
|
|
) {
|
|
this.context = context;
|
|
this.logger = context.logger;
|
|
this.usageCounter = context.usageCounter;
|
|
this.ruleType = ruleType;
|
|
this.ruleName = null;
|
|
this.ruleConsumer = null;
|
|
this.taskInstance = taskInstanceToAlertTaskInstance(taskInstance);
|
|
this.ruleTypeRegistry = context.ruleTypeRegistry;
|
|
this.searchAbortController = new AbortController();
|
|
this.cancelled = false;
|
|
this.executionId = uuid.v4();
|
|
this.inMemoryMetrics = inMemoryMetrics;
|
|
}
|
|
|
|
private async getDecryptedAttributes(
|
|
ruleId: string,
|
|
spaceId: string
|
|
): Promise<{ apiKey: string | null; enabled: boolean; consumer: string }> {
|
|
const namespace = this.context.spaceIdToNamespace(spaceId);
|
|
// Only fetch encrypted attributes here, we'll create a saved objects client
|
|
// scoped with the API key to fetch the remaining data.
|
|
const {
|
|
attributes: { apiKey, enabled, consumer },
|
|
} = await this.context.encryptedSavedObjectsClient.getDecryptedAsInternalUser<RawRule>(
|
|
'alert',
|
|
ruleId,
|
|
{ namespace }
|
|
);
|
|
|
|
return { apiKey, enabled, consumer };
|
|
}
|
|
|
|
private getFakeKibanaRequest(spaceId: string, apiKey: RawRule['apiKey']) {
|
|
const requestHeaders: Record<string, string> = {};
|
|
|
|
if (apiKey) {
|
|
requestHeaders.authorization = `ApiKey ${apiKey}`;
|
|
}
|
|
|
|
const path = addSpaceIdToPath('/', spaceId);
|
|
|
|
const fakeRequest = KibanaRequest.from({
|
|
headers: requestHeaders,
|
|
path: '/',
|
|
route: { settings: {} },
|
|
url: {
|
|
href: '/',
|
|
},
|
|
raw: {
|
|
req: {
|
|
url: '/',
|
|
},
|
|
},
|
|
} as unknown as Request);
|
|
|
|
this.context.basePathService.set(fakeRequest, path);
|
|
|
|
return fakeRequest;
|
|
}
|
|
|
|
private getExecutionHandler(
|
|
ruleId: string,
|
|
ruleName: string,
|
|
tags: string[] | undefined,
|
|
spaceId: string,
|
|
apiKey: RawRule['apiKey'],
|
|
kibanaBaseUrl: string | undefined,
|
|
actions: Alert<Params>['actions'],
|
|
ruleParams: Params,
|
|
request: KibanaRequest
|
|
) {
|
|
return createExecutionHandler<
|
|
Params,
|
|
ExtractedParams,
|
|
State,
|
|
InstanceState,
|
|
InstanceContext,
|
|
ActionGroupIds,
|
|
RecoveryActionGroupId
|
|
>({
|
|
ruleId,
|
|
ruleName,
|
|
ruleConsumer: this.ruleConsumer!,
|
|
tags,
|
|
executionId: this.executionId,
|
|
logger: this.logger,
|
|
actionsPlugin: this.context.actionsPlugin,
|
|
apiKey,
|
|
actions,
|
|
spaceId,
|
|
ruleType: this.ruleType,
|
|
kibanaBaseUrl,
|
|
eventLogger: this.context.eventLogger,
|
|
request,
|
|
ruleParams,
|
|
supportsEphemeralTasks: this.context.supportsEphemeralTasks,
|
|
maxEphemeralActionsPerRule: this.context.maxEphemeralActionsPerRule,
|
|
});
|
|
}
|
|
|
|
private async updateRuleSavedObject(
|
|
ruleId: string,
|
|
namespace: string | undefined,
|
|
attributes: { executionStatus?: RawRuleExecutionStatus; monitoring?: RuleMonitoring }
|
|
) {
|
|
const client = this.context.internalSavedObjectsRepository;
|
|
|
|
try {
|
|
await partiallyUpdateAlert(client, ruleId, attributes, {
|
|
ignore404: true,
|
|
namespace,
|
|
refresh: false,
|
|
});
|
|
} catch (err) {
|
|
this.logger.error(`error updating rule for ${this.ruleType.id}:${ruleId} ${err.message}`);
|
|
}
|
|
}
|
|
|
|
private isRuleSnoozed(rule: SanitizedAlert<Params>): boolean {
|
|
if (rule.muteAll) {
|
|
return true;
|
|
}
|
|
|
|
if (rule.snoozeEndTime == null) {
|
|
return false;
|
|
}
|
|
|
|
return Date.now() < rule.snoozeEndTime.getTime();
|
|
}
|
|
|
|
private shouldLogAndScheduleActionsForAlerts() {
|
|
// if execution hasn't been cancelled, return true
|
|
if (!this.cancelled) {
|
|
return true;
|
|
}
|
|
|
|
// if execution has been cancelled, return true if EITHER alerting config or rule type indicate to proceed with scheduling actions
|
|
return !this.context.cancelAlertsOnRuleTimeout || !this.ruleType.cancelAlertsOnRuleTimeout;
|
|
}
|
|
|
|
private countUsageOfActionExecutionAfterRuleCancellation() {
|
|
if (this.cancelled && this.usageCounter) {
|
|
if (this.context.cancelAlertsOnRuleTimeout && this.ruleType.cancelAlertsOnRuleTimeout) {
|
|
// Increment usage counter for skipped actions
|
|
this.usageCounter.incrementCounter({
|
|
counterName: `alertsSkippedDueToRuleExecutionTimeout_${this.ruleType.id}`,
|
|
incrementBy: 1,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
private async executeAlert(
|
|
alertId: string,
|
|
alert: CreatedAlert<InstanceState, InstanceContext>,
|
|
executionHandler: ExecutionHandler<ActionGroupIds | RecoveryActionGroupId>,
|
|
alertExecutionStore: AlertExecutionStore
|
|
) {
|
|
const {
|
|
actionGroup,
|
|
subgroup: actionSubgroup,
|
|
context,
|
|
state,
|
|
} = alert.getScheduledActionOptions()!;
|
|
alert.updateLastScheduledActions(actionGroup, actionSubgroup);
|
|
alert.unscheduleActions();
|
|
return executionHandler({
|
|
actionGroup,
|
|
actionSubgroup,
|
|
context,
|
|
state,
|
|
alertId,
|
|
alertExecutionStore,
|
|
});
|
|
}
|
|
|
|
private async executeAlerts(
|
|
fakeRequest: KibanaRequest,
|
|
rule: SanitizedAlert<Params>,
|
|
params: Params,
|
|
executionHandler: ExecutionHandler<ActionGroupIds | RecoveryActionGroupId>,
|
|
spaceId: string,
|
|
event: Event
|
|
): Promise<RuleExecutionState> {
|
|
const {
|
|
alertTypeId,
|
|
consumer,
|
|
schedule,
|
|
throttle,
|
|
notifyWhen,
|
|
mutedInstanceIds,
|
|
name,
|
|
tags,
|
|
createdBy,
|
|
updatedBy,
|
|
createdAt,
|
|
updatedAt,
|
|
enabled,
|
|
actions,
|
|
} = rule;
|
|
const {
|
|
params: { alertId: ruleId },
|
|
state: { alertInstances: alertRawInstances = {}, alertTypeState = {}, previousStartedAt },
|
|
} = this.taskInstance;
|
|
const namespace = this.context.spaceIdToNamespace(spaceId);
|
|
const ruleType = this.ruleTypeRegistry.get(alertTypeId);
|
|
|
|
const alerts = mapValues<
|
|
Record<string, RawAlertInstance>,
|
|
CreatedAlert<InstanceState, InstanceContext>
|
|
>(
|
|
alertRawInstances,
|
|
(rawAlert, alertId) => new CreatedAlert<InstanceState, InstanceContext>(alertId, rawAlert)
|
|
);
|
|
|
|
const originalAlerts = cloneDeep(alerts);
|
|
const originalAlertIds = new Set(Object.keys(originalAlerts));
|
|
|
|
const eventLogger = this.context.eventLogger;
|
|
const ruleLabel = `${this.ruleType.id}:${ruleId}: '${name}'`;
|
|
|
|
const scopedClusterClient = this.context.elasticsearch.client.asScoped(fakeRequest);
|
|
const wrappedScopedClusterClient = createWrappedScopedClusterClientFactory({
|
|
scopedClusterClient,
|
|
rule: {
|
|
name: rule.name,
|
|
alertTypeId: rule.alertTypeId,
|
|
id: rule.id,
|
|
spaceId,
|
|
},
|
|
logger: this.logger,
|
|
abortController: this.searchAbortController,
|
|
});
|
|
|
|
let updatedRuleTypeState: void | Record<string, unknown>;
|
|
try {
|
|
const ctx = {
|
|
type: 'alert',
|
|
name: `execute ${rule.alertTypeId}`,
|
|
id: ruleId,
|
|
description: `execute [${rule.alertTypeId}] with name [${name}] in [${
|
|
namespace ?? 'default'
|
|
}] namespace`,
|
|
};
|
|
|
|
const savedObjectsClient = this.context.savedObjects.getScopedClient(fakeRequest, {
|
|
includedHiddenTypes: ['alert', 'action'],
|
|
});
|
|
|
|
updatedRuleTypeState = await this.context.executionContext.withContext(ctx, () =>
|
|
this.ruleType.executor({
|
|
alertId: ruleId,
|
|
executionId: this.executionId,
|
|
services: {
|
|
savedObjectsClient,
|
|
uiSettingsClient: this.context.uiSettings.asScopedToClient(savedObjectsClient),
|
|
scopedClusterClient: wrappedScopedClusterClient.client(),
|
|
alertFactory: createAlertFactory<
|
|
InstanceState,
|
|
InstanceContext,
|
|
WithoutReservedActionGroups<ActionGroupIds, RecoveryActionGroupId>
|
|
>({
|
|
alerts,
|
|
logger: this.logger,
|
|
canSetRecoveryContext: ruleType.doesSetRecoveryContext ?? false,
|
|
}),
|
|
shouldWriteAlerts: () => this.shouldLogAndScheduleActionsForAlerts(),
|
|
shouldStopExecution: () => this.cancelled,
|
|
},
|
|
params,
|
|
state: alertTypeState as State,
|
|
startedAt: this.taskInstance.startedAt!,
|
|
previousStartedAt: previousStartedAt ? new Date(previousStartedAt) : null,
|
|
spaceId,
|
|
namespace,
|
|
name,
|
|
tags,
|
|
createdBy,
|
|
updatedBy,
|
|
rule: {
|
|
name,
|
|
tags,
|
|
consumer,
|
|
producer: ruleType.producer,
|
|
ruleTypeId: rule.alertTypeId,
|
|
ruleTypeName: ruleType.name,
|
|
enabled,
|
|
schedule,
|
|
actions,
|
|
createdBy,
|
|
updatedBy,
|
|
createdAt,
|
|
updatedAt,
|
|
throttle,
|
|
notifyWhen,
|
|
},
|
|
})
|
|
);
|
|
} catch (err) {
|
|
event.message = `rule execution failure: ${ruleLabel}`;
|
|
event.error = event.error || {};
|
|
event.error.message = err.message;
|
|
event.event = event.event || {};
|
|
event.event.outcome = 'failure';
|
|
|
|
throw new ErrorWithReason(AlertExecutionStatusErrorReasons.Execute, err);
|
|
}
|
|
|
|
event.message = `rule executed: ${ruleLabel}`;
|
|
event.event = event.event || {};
|
|
event.event.outcome = 'success';
|
|
event.rule = {
|
|
...event.rule,
|
|
name: rule.name,
|
|
};
|
|
|
|
const searchMetrics = wrappedScopedClusterClient.getMetrics();
|
|
|
|
// Cleanup alerts that are no longer scheduling actions to avoid over populating the alertInstances object
|
|
const alertsWithScheduledActions = pickBy(
|
|
alerts,
|
|
(alert: CreatedAlert<InstanceState, InstanceContext>) => alert.hasScheduledActions()
|
|
);
|
|
|
|
const recoveredAlerts = getRecoveredAlerts(alerts, originalAlertIds);
|
|
|
|
logActiveAndRecoveredAlerts({
|
|
logger: this.logger,
|
|
activeAlerts: alertsWithScheduledActions,
|
|
recoveredAlerts,
|
|
ruleLabel,
|
|
canSetRecoveryContext: ruleType.doesSetRecoveryContext ?? false,
|
|
});
|
|
|
|
trackAlertDurations({
|
|
originalAlerts,
|
|
currentAlerts: alertsWithScheduledActions,
|
|
recoveredAlerts,
|
|
});
|
|
|
|
if (this.shouldLogAndScheduleActionsForAlerts()) {
|
|
generateNewAndRecoveredAlertEvents({
|
|
eventLogger,
|
|
executionId: this.executionId,
|
|
originalAlerts,
|
|
currentAlerts: alertsWithScheduledActions,
|
|
recoveredAlerts,
|
|
ruleId,
|
|
ruleLabel,
|
|
namespace,
|
|
ruleType,
|
|
rule,
|
|
spaceId,
|
|
});
|
|
}
|
|
|
|
const alertExecutionStore: AlertExecutionStore = {
|
|
numberOfTriggeredActions: 0,
|
|
numberOfScheduledActions: 0,
|
|
triggeredActionsStatus: ActionsCompletion.COMPLETE,
|
|
};
|
|
|
|
const ruleIsSnoozed = this.isRuleSnoozed(rule);
|
|
if (!ruleIsSnoozed && this.shouldLogAndScheduleActionsForAlerts()) {
|
|
const mutedAlertIdsSet = new Set(mutedInstanceIds);
|
|
|
|
const alertsWithExecutableActions = Object.entries(alertsWithScheduledActions).filter(
|
|
([alertName, alert]: [string, CreatedAlert<InstanceState, InstanceContext>]) => {
|
|
const throttled = alert.isThrottled(throttle);
|
|
const muted = mutedAlertIdsSet.has(alertName);
|
|
let shouldExecuteAction = true;
|
|
|
|
if (throttled || muted) {
|
|
shouldExecuteAction = false;
|
|
this.logger.debug(
|
|
`skipping scheduling of actions for '${alertName}' in rule ${ruleLabel}: rule is ${
|
|
muted ? 'muted' : 'throttled'
|
|
}`
|
|
);
|
|
} else if (
|
|
notifyWhen === 'onActionGroupChange' &&
|
|
!alert.scheduledActionGroupOrSubgroupHasChanged()
|
|
) {
|
|
shouldExecuteAction = false;
|
|
this.logger.debug(
|
|
`skipping scheduling of actions for '${alertName}' in rule ${ruleLabel}: alert is active but action group has not changed`
|
|
);
|
|
}
|
|
|
|
return shouldExecuteAction;
|
|
}
|
|
);
|
|
|
|
await Promise.all(
|
|
alertsWithExecutableActions.map(
|
|
([alertId, alert]: [string, CreatedAlert<InstanceState, InstanceContext>]) =>
|
|
this.executeAlert(alertId, alert, executionHandler, alertExecutionStore)
|
|
)
|
|
);
|
|
|
|
await scheduleActionsForRecoveredAlerts<
|
|
InstanceState,
|
|
InstanceContext,
|
|
RecoveryActionGroupId
|
|
>({
|
|
recoveryActionGroup: this.ruleType.recoveryActionGroup,
|
|
recoveredAlerts,
|
|
executionHandler,
|
|
mutedAlertIdsSet,
|
|
logger: this.logger,
|
|
ruleLabel,
|
|
alertExecutionStore,
|
|
});
|
|
} else {
|
|
if (ruleIsSnoozed) {
|
|
this.logger.debug(`no scheduling of actions for rule ${ruleLabel}: rule is snoozed.`);
|
|
}
|
|
if (!this.shouldLogAndScheduleActionsForAlerts()) {
|
|
this.logger.debug(
|
|
`no scheduling of actions for rule ${ruleLabel}: rule execution has been cancelled.`
|
|
);
|
|
// Usage counter for telemetry
|
|
// This keeps track of how many times action executions were skipped after rule
|
|
// execution completed successfully after the execution timeout
|
|
// This can occur when rule executors do not short circuit execution in response
|
|
// to timeout
|
|
this.countUsageOfActionExecutionAfterRuleCancellation();
|
|
}
|
|
}
|
|
|
|
return {
|
|
metrics: searchMetrics,
|
|
alertExecutionStore,
|
|
alertTypeState: updatedRuleTypeState || undefined,
|
|
alertInstances: mapValues<
|
|
Record<string, CreatedAlert<InstanceState, InstanceContext>>,
|
|
RawAlertInstance
|
|
>(alertsWithScheduledActions, (alert) => alert.toRaw()),
|
|
};
|
|
}
|
|
|
|
private async validateAndExecuteRule(
|
|
fakeRequest: KibanaRequest,
|
|
apiKey: RawRule['apiKey'],
|
|
rule: SanitizedAlert<Params>,
|
|
event: Event
|
|
) {
|
|
const {
|
|
params: { alertId: ruleId, spaceId },
|
|
} = this.taskInstance;
|
|
|
|
// Validate
|
|
const validatedParams = validateRuleTypeParams(rule.params, this.ruleType.validate?.params);
|
|
const executionHandler = this.getExecutionHandler(
|
|
ruleId,
|
|
rule.name,
|
|
rule.tags,
|
|
spaceId,
|
|
apiKey,
|
|
this.context.kibanaBaseUrl,
|
|
rule.actions,
|
|
rule.params,
|
|
fakeRequest
|
|
);
|
|
return this.executeAlerts(fakeRequest, rule, validatedParams, executionHandler, spaceId, event);
|
|
}
|
|
|
|
private async loadRuleAttributesAndRun(
|
|
event: Event
|
|
): Promise<Resultable<RuleExecutionRunResult, Error>> {
|
|
const {
|
|
params: { alertId: ruleId, spaceId },
|
|
} = this.taskInstance;
|
|
let enabled: boolean;
|
|
let apiKey: string | null;
|
|
let consumer: string;
|
|
try {
|
|
const decryptedAttributes = await this.getDecryptedAttributes(ruleId, spaceId);
|
|
apiKey = decryptedAttributes.apiKey;
|
|
enabled = decryptedAttributes.enabled;
|
|
consumer = decryptedAttributes.consumer;
|
|
} catch (err) {
|
|
throw new ErrorWithReason(AlertExecutionStatusErrorReasons.Decrypt, err);
|
|
}
|
|
|
|
this.ruleConsumer = consumer;
|
|
|
|
if (!enabled) {
|
|
throw new ErrorWithReason(
|
|
AlertExecutionStatusErrorReasons.Disabled,
|
|
new Error(`Rule failed to execute because rule ran after it was disabled.`)
|
|
);
|
|
}
|
|
|
|
const fakeRequest = this.getFakeKibanaRequest(spaceId, apiKey);
|
|
|
|
// Get rules client with space level permissions
|
|
const rulesClient = this.context.getRulesClientWithRequest(fakeRequest);
|
|
|
|
let rule: SanitizedAlert<Params>;
|
|
|
|
// Ensure API key is still valid and user has access
|
|
try {
|
|
rule = await rulesClient.get({ id: ruleId });
|
|
|
|
if (apm.currentTransaction) {
|
|
apm.currentTransaction.name = `Execute Alerting Rule: "${rule.name}"`;
|
|
apm.currentTransaction.addLabels({
|
|
alerting_rule_consumer: rule.consumer,
|
|
alerting_rule_name: rule.name,
|
|
alerting_rule_tags: rule.tags.join(', '),
|
|
alerting_rule_type_id: rule.alertTypeId,
|
|
alerting_rule_params: JSON.stringify(rule.params),
|
|
});
|
|
}
|
|
} catch (err) {
|
|
throw new ErrorWithReason(AlertExecutionStatusErrorReasons.Read, err);
|
|
}
|
|
|
|
this.ruleName = rule.name;
|
|
|
|
try {
|
|
this.ruleTypeRegistry.ensureRuleTypeEnabled(rule.alertTypeId);
|
|
} catch (err) {
|
|
throw new ErrorWithReason(AlertExecutionStatusErrorReasons.License, err);
|
|
}
|
|
|
|
if (rule.monitoring) {
|
|
if (rule.monitoring.execution.history.length >= MONITORING_HISTORY_LIMIT) {
|
|
// Remove the first (oldest) record
|
|
rule.monitoring.execution.history.shift();
|
|
}
|
|
}
|
|
return {
|
|
monitoring: asOk(rule.monitoring),
|
|
state: await promiseResult<RuleExecutionState, Error>(
|
|
this.validateAndExecuteRule(fakeRequest, apiKey, rule, event)
|
|
),
|
|
schedule: asOk(
|
|
// fetch the rule again to ensure we return the correct schedule as it may have
|
|
// changed during the task execution
|
|
(await rulesClient.get({ id: ruleId })).schedule
|
|
),
|
|
};
|
|
}
|
|
|
|
async run(): Promise<RuleTaskRunResult> {
|
|
const {
|
|
params: { alertId: ruleId, spaceId, consumer },
|
|
startedAt,
|
|
state: originalState,
|
|
schedule: taskSchedule,
|
|
} = this.taskInstance;
|
|
|
|
// Initially use consumer as stored inside the task instance
|
|
// Replace this with consumer as read from the rule saved object after
|
|
// we successfully read the rule SO. This allows us to populate a consumer
|
|
// value for `execute-start` events (which are written before the rule SO is read)
|
|
// and in the event of decryption errors (where we cannot read the rule SO)
|
|
// Because "consumer" is set when a rule is created, this value should be static
|
|
// for the life of a rule but there may be edge cases where migrations cause
|
|
// the consumer values to become out of sync.
|
|
if (consumer) {
|
|
this.ruleConsumer = consumer;
|
|
}
|
|
|
|
if (apm.currentTransaction) {
|
|
apm.currentTransaction.name = `Execute Alerting Rule`;
|
|
apm.currentTransaction.addLabels({
|
|
alerting_rule_id: ruleId,
|
|
});
|
|
}
|
|
|
|
const runDate = new Date();
|
|
const runDateString = runDate.toISOString();
|
|
this.logger.debug(`executing rule ${this.ruleType.id}:${ruleId} at ${runDateString}`);
|
|
|
|
const namespace = this.context.spaceIdToNamespace(spaceId);
|
|
const eventLogger = this.context.eventLogger;
|
|
const scheduleDelay = runDate.getTime() - this.taskInstance.scheduledAt.getTime();
|
|
|
|
const event = createAlertEventLogRecordObject({
|
|
ruleId,
|
|
ruleType: this.ruleType as UntypedNormalizedRuleType,
|
|
consumer: this.ruleConsumer!,
|
|
action: EVENT_LOG_ACTIONS.execute,
|
|
namespace,
|
|
spaceId,
|
|
executionId: this.executionId,
|
|
task: {
|
|
scheduled: this.taskInstance.scheduledAt.toISOString(),
|
|
scheduleDelay: Millis2Nanos * scheduleDelay,
|
|
},
|
|
savedObjects: [
|
|
{
|
|
id: ruleId,
|
|
type: 'alert',
|
|
typeId: this.ruleType.id,
|
|
relation: SAVED_OBJECT_REL_PRIMARY,
|
|
},
|
|
],
|
|
});
|
|
|
|
eventLogger.startTiming(event);
|
|
|
|
const startEvent = cloneDeep({
|
|
...event,
|
|
event: {
|
|
...event.event,
|
|
action: EVENT_LOG_ACTIONS.executeStart,
|
|
},
|
|
message: `rule execution start: "${ruleId}"`,
|
|
});
|
|
|
|
eventLogger.logEvent(startEvent);
|
|
|
|
const { state, schedule, monitoring } = await errorAsRuleTaskRunResult(
|
|
this.loadRuleAttributesAndRun(event)
|
|
);
|
|
|
|
const ruleMonitoring =
|
|
resolveErr<RuleMonitoring | undefined, Error>(monitoring, () => {
|
|
return getDefaultRuleMonitoring();
|
|
}) ?? getDefaultRuleMonitoring();
|
|
|
|
const executionStatus = map<RuleExecutionState, ElasticsearchError, AlertExecutionStatus>(
|
|
state,
|
|
(ruleExecutionState) => executionStatusFromState(ruleExecutionState),
|
|
(err: ElasticsearchError) => executionStatusFromError(err)
|
|
);
|
|
// set the executionStatus date to same as event, if it's set
|
|
if (event.event?.start) {
|
|
executionStatus.lastExecutionDate = new Date(event.event.start);
|
|
}
|
|
|
|
if (apm.currentTransaction) {
|
|
if (executionStatus.status === 'ok' || executionStatus.status === 'active') {
|
|
apm.currentTransaction.setOutcome('success');
|
|
} else if (executionStatus.status === 'error' || executionStatus.status === 'unknown') {
|
|
apm.currentTransaction.setOutcome('failure');
|
|
}
|
|
}
|
|
|
|
this.logger.debug(
|
|
`ruleExecutionStatus for ${this.ruleType.id}:${ruleId}: ${JSON.stringify(executionStatus)}`
|
|
);
|
|
|
|
eventLogger.stopTiming(event);
|
|
set(event, 'kibana.alerting.status', executionStatus.status);
|
|
|
|
if (this.ruleConsumer) {
|
|
set(event, 'kibana.alert.rule.consumer', this.ruleConsumer);
|
|
}
|
|
|
|
const monitoringHistory: RuleMonitoringHistory = {
|
|
success: true,
|
|
timestamp: +new Date(),
|
|
};
|
|
|
|
// Copy duration into execution status if available
|
|
if (null != event.event?.duration) {
|
|
executionStatus.lastDuration = Math.round(event.event?.duration / Millis2Nanos);
|
|
monitoringHistory.duration = executionStatus.lastDuration;
|
|
}
|
|
|
|
// if executionStatus indicates an error, fill in fields in
|
|
// event from it
|
|
if (executionStatus.error) {
|
|
set(event, 'event.reason', executionStatus.error?.reason || 'unknown');
|
|
set(event, 'event.outcome', 'failure');
|
|
set(event, 'error.message', event?.error?.message || executionStatus.error.message);
|
|
if (!event.message) {
|
|
event.message = `${this.ruleType.id}:${ruleId}: execution failed`;
|
|
}
|
|
monitoringHistory.success = false;
|
|
} else {
|
|
if (executionStatus.warning) {
|
|
set(event, 'event.reason', executionStatus.warning?.reason || 'unknown');
|
|
set(event, 'message', event?.message || executionStatus.warning.message);
|
|
}
|
|
set(
|
|
event,
|
|
'kibana.alert.rule.execution.metrics.number_of_triggered_actions',
|
|
executionStatus.numberOfTriggeredActions
|
|
);
|
|
set(
|
|
event,
|
|
'kibana.alert.rule.execution.metrics.number_of_scheduled_actions',
|
|
executionStatus.numberOfScheduledActions
|
|
);
|
|
}
|
|
|
|
// Copy search stats into event log
|
|
if (executionStatus.metrics) {
|
|
set(
|
|
event,
|
|
'kibana.alert.rule.execution.metrics.number_of_searches',
|
|
executionStatus.metrics.numSearches ?? 0
|
|
);
|
|
set(
|
|
event,
|
|
'kibana.alert.rule.execution.metrics.es_search_duration_ms',
|
|
executionStatus.metrics.esSearchDurationMs ?? 0
|
|
);
|
|
set(
|
|
event,
|
|
'kibana.alert.rule.execution.metrics.total_search_duration_ms',
|
|
executionStatus.metrics.totalSearchDurationMs ?? 0
|
|
);
|
|
}
|
|
|
|
ruleMonitoring.execution.history.push(monitoringHistory);
|
|
ruleMonitoring.execution.calculated_metrics = {
|
|
success_ratio: getExecutionSuccessRatio(ruleMonitoring),
|
|
...getExecutionDurationPercentiles(ruleMonitoring),
|
|
};
|
|
|
|
eventLogger.logEvent(event);
|
|
|
|
if (!this.cancelled) {
|
|
this.inMemoryMetrics.increment(IN_MEMORY_METRICS.RULE_EXECUTIONS);
|
|
if (executionStatus.error) {
|
|
this.inMemoryMetrics.increment(IN_MEMORY_METRICS.RULE_FAILURES);
|
|
}
|
|
this.logger.debug(
|
|
`Updating rule task for ${this.ruleType.id} rule with id ${ruleId} - ${JSON.stringify(
|
|
executionStatus
|
|
)}`
|
|
);
|
|
await this.updateRuleSavedObject(ruleId, namespace, {
|
|
executionStatus: ruleExecutionStatusToRaw(executionStatus),
|
|
monitoring: ruleMonitoring,
|
|
});
|
|
}
|
|
|
|
const transformExecutionStateToTaskState = (
|
|
executionState: RuleExecutionState
|
|
): RuleTaskState => {
|
|
return {
|
|
...omit(executionState, ['alertExecutionStore', 'metrics']),
|
|
previousStartedAt: startedAt,
|
|
};
|
|
};
|
|
|
|
return {
|
|
state: map<RuleExecutionState, ElasticsearchError, RuleTaskState>(
|
|
state,
|
|
(executionState: RuleExecutionState) => transformExecutionStateToTaskState(executionState),
|
|
(err: ElasticsearchError) => {
|
|
const message = `Executing Rule ${spaceId}:${
|
|
this.ruleType.id
|
|
}:${ruleId} has resulted in Error: ${getEsErrorMessage(err)}`;
|
|
if (isAlertSavedObjectNotFoundError(err, ruleId)) {
|
|
this.logger.debug(message);
|
|
} else {
|
|
this.logger.error(message);
|
|
}
|
|
return originalState;
|
|
}
|
|
),
|
|
schedule: resolveErr<IntervalSchedule | undefined, Error>(schedule, (error) => {
|
|
if (isAlertSavedObjectNotFoundError(error, ruleId)) {
|
|
const spaceMessage = spaceId ? `in the "${spaceId}" space ` : '';
|
|
this.logger.warn(
|
|
`Unable to execute rule "${ruleId}" ${spaceMessage}because ${error.message} - this rule will not be rescheduled. To restart rule execution, try disabling and re-enabling this rule.`
|
|
);
|
|
throwUnrecoverableError(error);
|
|
}
|
|
|
|
let retryInterval = taskSchedule?.interval ?? FALLBACK_RETRY_INTERVAL;
|
|
|
|
// Set retry interval smaller for ES connectivity errors
|
|
if (isEsUnavailableError(error, ruleId)) {
|
|
retryInterval =
|
|
parseDuration(retryInterval) > parseDuration(CONNECTIVITY_RETRY_INTERVAL)
|
|
? CONNECTIVITY_RETRY_INTERVAL
|
|
: retryInterval;
|
|
}
|
|
|
|
return { interval: retryInterval };
|
|
}),
|
|
monitoring: ruleMonitoring,
|
|
};
|
|
}
|
|
|
|
async cancel(): Promise<void> {
|
|
if (this.cancelled) {
|
|
return;
|
|
}
|
|
|
|
this.cancelled = true;
|
|
|
|
// Write event log entry
|
|
const {
|
|
params: { alertId: ruleId, spaceId, consumer },
|
|
} = this.taskInstance;
|
|
const namespace = this.context.spaceIdToNamespace(spaceId);
|
|
|
|
if (consumer && !this.ruleConsumer) {
|
|
this.ruleConsumer = consumer;
|
|
}
|
|
|
|
this.logger.debug(
|
|
`Cancelling rule type ${this.ruleType.id} with id ${ruleId} - execution exceeded rule type timeout of ${this.ruleType.ruleTaskTimeout}`
|
|
);
|
|
|
|
this.logger.debug(
|
|
`Aborting any in-progress ES searches for rule type ${this.ruleType.id} with id ${ruleId}`
|
|
);
|
|
this.searchAbortController.abort();
|
|
|
|
const eventLogger = this.context.eventLogger;
|
|
const event: IEvent = {
|
|
event: {
|
|
action: EVENT_LOG_ACTIONS.executeTimeout,
|
|
kind: 'alert',
|
|
category: [this.ruleType.producer],
|
|
},
|
|
message: `rule: ${this.ruleType.id}:${ruleId}: '${
|
|
this.ruleName ?? ''
|
|
}' execution cancelled due to timeout - exceeded rule type timeout of ${
|
|
this.ruleType.ruleTaskTimeout
|
|
}`,
|
|
kibana: {
|
|
alert: {
|
|
rule: {
|
|
...(this.ruleConsumer ? { consumer: this.ruleConsumer } : {}),
|
|
execution: {
|
|
uuid: this.executionId,
|
|
},
|
|
rule_type_id: this.ruleType.id,
|
|
},
|
|
},
|
|
saved_objects: [
|
|
{
|
|
rel: SAVED_OBJECT_REL_PRIMARY,
|
|
type: 'alert',
|
|
id: ruleId,
|
|
type_id: this.ruleType.id,
|
|
namespace,
|
|
},
|
|
],
|
|
space_ids: [spaceId],
|
|
},
|
|
rule: {
|
|
id: ruleId,
|
|
license: this.ruleType.minimumLicenseRequired,
|
|
category: this.ruleType.id,
|
|
ruleset: this.ruleType.producer,
|
|
...(this.ruleName ? { name: this.ruleName } : {}),
|
|
},
|
|
};
|
|
eventLogger.logEvent(event);
|
|
|
|
this.inMemoryMetrics.increment(IN_MEMORY_METRICS.RULE_TIMEOUTS);
|
|
|
|
// Update the rule saved object with execution status
|
|
const executionStatus: AlertExecutionStatus = {
|
|
lastExecutionDate: new Date(),
|
|
status: 'error',
|
|
error: {
|
|
reason: AlertExecutionStatusErrorReasons.Timeout,
|
|
message: `${this.ruleType.id}:${ruleId}: execution cancelled due to timeout - exceeded rule type timeout of ${this.ruleType.ruleTaskTimeout}`,
|
|
},
|
|
};
|
|
this.logger.debug(
|
|
`Updating rule task for ${this.ruleType.id} rule with id ${ruleId} - execution error due to timeout`
|
|
);
|
|
await this.updateRuleSavedObject(ruleId, namespace, {
|
|
executionStatus: ruleExecutionStatusToRaw(executionStatus),
|
|
});
|
|
}
|
|
}
|
|
|
|
function trackAlertDurations<
|
|
InstanceState extends AlertInstanceState,
|
|
InstanceContext extends AlertInstanceContext
|
|
>(params: TrackAlertDurationsParams<InstanceState, InstanceContext>) {
|
|
const currentTime = new Date().toISOString();
|
|
const { currentAlerts, originalAlerts, recoveredAlerts } = params;
|
|
const originalAlertIds = Object.keys(originalAlerts);
|
|
const currentAlertIds = Object.keys(currentAlerts);
|
|
const recoveredAlertIds = Object.keys(recoveredAlerts);
|
|
const newAlertIds = without(currentAlertIds, ...originalAlertIds);
|
|
|
|
// Inject start time into alert state of new alerts
|
|
for (const id of newAlertIds) {
|
|
const state = currentAlerts[id].getState();
|
|
currentAlerts[id].replaceState({ ...state, start: currentTime });
|
|
}
|
|
|
|
// Calculate duration to date for active alerts
|
|
for (const id of currentAlertIds) {
|
|
const state = originalAlertIds.includes(id)
|
|
? originalAlerts[id].getState()
|
|
: currentAlerts[id].getState();
|
|
const duration = state.start
|
|
? (new Date(currentTime).valueOf() - new Date(state.start as string).valueOf()) * 1000 * 1000 // nanoseconds
|
|
: undefined;
|
|
currentAlerts[id].replaceState({
|
|
...state,
|
|
...(state.start ? { start: state.start } : {}),
|
|
...(duration !== undefined ? { duration } : {}),
|
|
});
|
|
}
|
|
|
|
// Inject end time into alert state of recovered alerts
|
|
for (const id of recoveredAlertIds) {
|
|
const state = recoveredAlerts[id].getState();
|
|
const duration = state.start
|
|
? (new Date(currentTime).valueOf() - new Date(state.start as string).valueOf()) * 1000 * 1000 // nanoseconds
|
|
: undefined;
|
|
recoveredAlerts[id].replaceState({
|
|
...state,
|
|
...(duration ? { duration } : {}),
|
|
...(state.start ? { end: currentTime } : {}),
|
|
});
|
|
}
|
|
}
|
|
|
|
function generateNewAndRecoveredAlertEvents<
|
|
InstanceState extends AlertInstanceState,
|
|
InstanceContext extends AlertInstanceContext
|
|
>(params: GenerateNewAndRecoveredAlertEventsParams<InstanceState, InstanceContext>) {
|
|
const {
|
|
eventLogger,
|
|
executionId,
|
|
ruleId,
|
|
namespace,
|
|
currentAlerts,
|
|
originalAlerts,
|
|
recoveredAlerts,
|
|
rule,
|
|
ruleType,
|
|
spaceId,
|
|
} = params;
|
|
const originalAlertIds = Object.keys(originalAlerts);
|
|
const currentAlertIds = Object.keys(currentAlerts);
|
|
const recoveredAlertIds = Object.keys(recoveredAlerts);
|
|
const newIds = without(currentAlertIds, ...originalAlertIds);
|
|
|
|
if (apm.currentTransaction) {
|
|
apm.currentTransaction.addLabels({
|
|
alerting_new_alerts: newIds.length,
|
|
});
|
|
}
|
|
|
|
for (const id of recoveredAlertIds) {
|
|
const { group: actionGroup, subgroup: actionSubgroup } =
|
|
recoveredAlerts[id].getLastScheduledActions() ?? {};
|
|
const state = recoveredAlerts[id].getState();
|
|
const message = `${params.ruleLabel} alert '${id}' has recovered`;
|
|
logAlertEvent(
|
|
id,
|
|
EVENT_LOG_ACTIONS.recoveredInstance,
|
|
message,
|
|
state,
|
|
actionGroup,
|
|
actionSubgroup
|
|
);
|
|
}
|
|
|
|
for (const id of newIds) {
|
|
const { actionGroup, subgroup: actionSubgroup } =
|
|
currentAlerts[id].getScheduledActionOptions() ?? {};
|
|
const state = currentAlerts[id].getState();
|
|
const message = `${params.ruleLabel} created new alert: '${id}'`;
|
|
logAlertEvent(id, EVENT_LOG_ACTIONS.newInstance, message, state, actionGroup, actionSubgroup);
|
|
}
|
|
|
|
for (const id of currentAlertIds) {
|
|
const { actionGroup, subgroup: actionSubgroup } =
|
|
currentAlerts[id].getScheduledActionOptions() ?? {};
|
|
const state = currentAlerts[id].getState();
|
|
const message = `${params.ruleLabel} active alert: '${id}' in ${
|
|
actionSubgroup
|
|
? `actionGroup(subgroup): '${actionGroup}(${actionSubgroup})'`
|
|
: `actionGroup: '${actionGroup}'`
|
|
}`;
|
|
logAlertEvent(
|
|
id,
|
|
EVENT_LOG_ACTIONS.activeInstance,
|
|
message,
|
|
state,
|
|
actionGroup,
|
|
actionSubgroup
|
|
);
|
|
}
|
|
|
|
function logAlertEvent(
|
|
alertId: string,
|
|
action: string,
|
|
message: string,
|
|
state: InstanceState,
|
|
group?: string,
|
|
subgroup?: string
|
|
) {
|
|
const event: IEvent = {
|
|
event: {
|
|
action,
|
|
kind: 'alert',
|
|
category: [ruleType.producer],
|
|
...(state?.start ? { start: state.start as string } : {}),
|
|
...(state?.end ? { end: state.end as string } : {}),
|
|
...(state?.duration !== undefined ? { duration: state.duration as number } : {}),
|
|
},
|
|
kibana: {
|
|
alert: {
|
|
rule: {
|
|
consumer: rule.consumer,
|
|
execution: {
|
|
uuid: executionId,
|
|
},
|
|
rule_type_id: ruleType.id,
|
|
},
|
|
},
|
|
alerting: {
|
|
instance_id: alertId,
|
|
...(group ? { action_group_id: group } : {}),
|
|
...(subgroup ? { action_subgroup: subgroup } : {}),
|
|
},
|
|
saved_objects: [
|
|
{
|
|
rel: SAVED_OBJECT_REL_PRIMARY,
|
|
type: 'alert',
|
|
id: ruleId,
|
|
type_id: ruleType.id,
|
|
namespace,
|
|
},
|
|
],
|
|
space_ids: [spaceId],
|
|
},
|
|
message,
|
|
rule: {
|
|
id: rule.id,
|
|
license: ruleType.minimumLicenseRequired,
|
|
category: ruleType.id,
|
|
ruleset: ruleType.producer,
|
|
name: rule.name,
|
|
},
|
|
};
|
|
eventLogger.logEvent(event);
|
|
}
|
|
}
|
|
|
|
async function scheduleActionsForRecoveredAlerts<
|
|
InstanceState extends AlertInstanceState,
|
|
InstanceContext extends AlertInstanceContext,
|
|
RecoveryActionGroupId extends string
|
|
>(
|
|
params: ScheduleActionsForRecoveredAlertsParams<
|
|
InstanceState,
|
|
InstanceContext,
|
|
RecoveryActionGroupId
|
|
>
|
|
): Promise<void> {
|
|
const {
|
|
logger,
|
|
recoveryActionGroup,
|
|
recoveredAlerts,
|
|
executionHandler,
|
|
mutedAlertIdsSet,
|
|
ruleLabel,
|
|
alertExecutionStore,
|
|
} = params;
|
|
const recoveredIds = Object.keys(recoveredAlerts);
|
|
|
|
for (const id of recoveredIds) {
|
|
if (mutedAlertIdsSet.has(id)) {
|
|
logger.debug(
|
|
`skipping scheduling of actions for '${id}' in rule ${ruleLabel}: instance is muted`
|
|
);
|
|
} else {
|
|
const alert = recoveredAlerts[id];
|
|
alert.updateLastScheduledActions(recoveryActionGroup.id);
|
|
alert.unscheduleActions();
|
|
await executionHandler({
|
|
actionGroup: recoveryActionGroup.id,
|
|
context: alert.getContext(),
|
|
state: {},
|
|
alertId: id,
|
|
alertExecutionStore,
|
|
});
|
|
alert.scheduleActions(recoveryActionGroup.id);
|
|
}
|
|
}
|
|
}
|
|
|
|
function logActiveAndRecoveredAlerts<
|
|
InstanceState extends AlertInstanceState,
|
|
InstanceContext extends AlertInstanceContext,
|
|
ActionGroupIds extends string,
|
|
RecoveryActionGroupId extends string
|
|
>(
|
|
params: LogActiveAndRecoveredAlertsParams<
|
|
InstanceState,
|
|
InstanceContext,
|
|
ActionGroupIds,
|
|
RecoveryActionGroupId
|
|
>
|
|
) {
|
|
const { logger, activeAlerts, recoveredAlerts, ruleLabel, canSetRecoveryContext } = params;
|
|
const activeAlertIds = Object.keys(activeAlerts);
|
|
const recoveredAlertIds = Object.keys(recoveredAlerts);
|
|
|
|
if (apm.currentTransaction) {
|
|
apm.currentTransaction.addLabels({
|
|
alerting_active_alerts: activeAlertIds.length,
|
|
alerting_recovered_alerts: recoveredAlertIds.length,
|
|
});
|
|
}
|
|
|
|
if (activeAlertIds.length > 0) {
|
|
logger.debug(
|
|
`rule ${ruleLabel} has ${activeAlertIds.length} active alerts: ${JSON.stringify(
|
|
activeAlertIds.map((alertId) => ({
|
|
instanceId: alertId,
|
|
actionGroup: activeAlerts[alertId].getScheduledActionOptions()?.actionGroup,
|
|
}))
|
|
)}`
|
|
);
|
|
}
|
|
if (recoveredAlertIds.length > 0) {
|
|
logger.debug(
|
|
`rule ${ruleLabel} has ${recoveredAlertIds.length} recovered alerts: ${JSON.stringify(
|
|
recoveredAlertIds
|
|
)}`
|
|
);
|
|
|
|
if (canSetRecoveryContext) {
|
|
for (const id of recoveredAlertIds) {
|
|
if (!recoveredAlerts[id].hasContext()) {
|
|
logger.debug(
|
|
`rule ${ruleLabel} has no recovery context specified for recovered alert ${id}`
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* If an error is thrown, wrap it in an RuleTaskRunResult
|
|
* so that we can treat each field independantly
|
|
*/
|
|
async function errorAsRuleTaskRunResult(
|
|
future: Promise<Resultable<RuleExecutionRunResult, Error>>
|
|
): Promise<Resultable<RuleExecutionRunResult, Error>> {
|
|
try {
|
|
return await future;
|
|
} catch (e) {
|
|
return {
|
|
state: asErr(e),
|
|
schedule: asErr(e),
|
|
monitoring: asErr(e),
|
|
};
|
|
}
|
|
}
|