mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 09:19:04 -04:00
[ResponseOps][alerting] add rule info to logging in alertsClient (#190857)
## Summary While investigating some issues with the alertsClient, I realized that we weren't writing out any rule information for the logged messages. This made debugging quite difficult, as I wanted to see the rule, so had to search through the alerts indices for the specified alert to get it's rule id, rule type, etc. As an example, see https://github.com/elastic/kibana/issues/190376 This PR adds that kind of rule info to the logged messages in alertsClient, as well as the typical sort of tags we write out (rule id, rule type, module).
This commit is contained in:
parent
e782811877
commit
07717a43ab
4 changed files with 172 additions and 42 deletions
|
@ -287,6 +287,9 @@ const defaultExecutionOpts = {
|
|||
startedAt: null,
|
||||
};
|
||||
|
||||
const ruleInfo = `for test.rule-type:1 'rule-name'`;
|
||||
const logTags = { tags: ['test.rule-type', '1', 'alerts-client'] };
|
||||
|
||||
describe('Alerts Client', () => {
|
||||
let alertsClientParams: AlertsClientParams;
|
||||
let processAndLogAlertsOpts: ProcessAndLogAlertsOpts;
|
||||
|
@ -484,7 +487,8 @@ describe('Alerts Client', () => {
|
|||
});
|
||||
|
||||
expect(logger.error).toHaveBeenCalledWith(
|
||||
`Error searching for tracked alerts by UUID - search failed!`
|
||||
`Error searching for tracked alerts by UUID ${ruleInfo} - search failed!`,
|
||||
logTags
|
||||
);
|
||||
|
||||
spy.mockRestore();
|
||||
|
@ -778,7 +782,8 @@ describe('Alerts Client', () => {
|
|||
expect(spy).toHaveBeenNthCalledWith(2, 'recoveredCurrent');
|
||||
|
||||
expect(logger.error).toHaveBeenCalledWith(
|
||||
"Error writing alert(2) to .alerts-test.alerts-default - alert(2) doesn't exist in active alerts"
|
||||
`Error writing alert(2) to .alerts-test.alerts-default - alert(2) doesn't exist in active alerts ${ruleInfo}.`,
|
||||
logTags
|
||||
);
|
||||
spy.mockRestore();
|
||||
|
||||
|
@ -1346,7 +1351,8 @@ describe('Alerts Client', () => {
|
|||
|
||||
expect(clusterClient.bulk).toHaveBeenCalled();
|
||||
expect(logger.error).toHaveBeenCalledWith(
|
||||
`Error writing alerts: 1 successful, 0 conflicts, 2 errors: Validation Failed: 1: index is missing;2: type is missing;; failed to parse field [process.command_line] of type [wildcard] in document with id 'f0c9805be95fedbc3c99c663f7f02cc15826c122'.`
|
||||
`Error writing alerts ${ruleInfo}: 1 successful, 0 conflicts, 2 errors: Validation Failed: 1: index is missing;2: type is missing;; failed to parse field [process.command_line] of type [wildcard] in document with id 'f0c9805be95fedbc3c99c663f7f02cc15826c122'.`,
|
||||
{ tags: ['test.rule-type', '1', 'resolve-alert-conflicts'] }
|
||||
);
|
||||
});
|
||||
|
||||
|
@ -1423,7 +1429,8 @@ describe('Alerts Client', () => {
|
|||
});
|
||||
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
`Could not update alert abc in partial-.internal.alerts-test.alerts-default-000001. Partial and restored alert indices are not supported.`
|
||||
`Could not update alert abc in partial-.internal.alerts-test.alerts-default-000001. Partial and restored alert indices are not supported ${ruleInfo}.`,
|
||||
logTags
|
||||
);
|
||||
});
|
||||
|
||||
|
@ -1448,7 +1455,8 @@ describe('Alerts Client', () => {
|
|||
|
||||
expect(clusterClient.bulk).toHaveBeenCalled();
|
||||
expect(logger.error).toHaveBeenCalledWith(
|
||||
`Error writing 2 alerts to .alerts-test.alerts-default - fail`
|
||||
`Error writing 2 alerts to .alerts-test.alerts-default ${ruleInfo} - fail`,
|
||||
logTags
|
||||
);
|
||||
});
|
||||
|
||||
|
@ -1478,7 +1486,8 @@ describe('Alerts Client', () => {
|
|||
});
|
||||
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
`Resources registered and installed for test context but "shouldWrite" is set to false.`
|
||||
`Resources registered and installed for test context but "shouldWrite" is set to false ${ruleInfo}.`,
|
||||
logTags
|
||||
);
|
||||
expect(clusterClient.bulk).not.toHaveBeenCalled();
|
||||
});
|
||||
|
@ -2026,7 +2035,8 @@ describe('Alerts Client', () => {
|
|||
).rejects.toBe('something went wrong!');
|
||||
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Error updating alert maintenance window IDs: something went wrong!'
|
||||
`Error updating alert maintenance window IDs for test.rule-type:1 'rule-name': something went wrong!`,
|
||||
logTags
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
|
@ -112,6 +112,8 @@ export class AlertsClient<
|
|||
|
||||
private reportedAlerts: Record<string, DeepPartial<AlertData>> = {};
|
||||
private _isUsingDataStreams: boolean;
|
||||
private ruleInfoMessage: string;
|
||||
private logTags: { tags: string[] };
|
||||
|
||||
constructor(private readonly options: AlertsClientParams) {
|
||||
this.legacyAlertsClient = new LegacyAlertsClient<
|
||||
|
@ -130,6 +132,8 @@ export class AlertsClient<
|
|||
this.rule = formatRule({ rule: this.options.rule, ruleType: this.options.ruleType });
|
||||
this.ruleType = options.ruleType;
|
||||
this._isUsingDataStreams = this.options.dataStreamAdapter.isUsingDataStreams();
|
||||
this.ruleInfoMessage = `for ${this.ruleType.id}:${this.options.rule.id} '${this.options.rule.name}'`;
|
||||
this.logTags = { tags: [this.ruleType.id, this.options.rule.id, 'alerts-client'] };
|
||||
}
|
||||
|
||||
public async initializeExecution(opts: InitializeExecutionOpts) {
|
||||
|
@ -202,7 +206,10 @@ export class AlertsClient<
|
|||
this.fetchedAlerts.primaryTerm[alertUuid] = hit._primary_term;
|
||||
}
|
||||
} catch (err) {
|
||||
this.options.logger.error(`Error searching for tracked alerts by UUID - ${err.message}`);
|
||||
this.options.logger.error(
|
||||
`Error searching for tracked alerts by UUID ${this.ruleInfoMessage} - ${err.message}`,
|
||||
this.logTags
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -327,7 +334,8 @@ export class AlertsClient<
|
|||
);
|
||||
} catch (e) {
|
||||
this.options.logger.debug(
|
||||
`Failed to update alert matched by maintenance window scoped query for rule ${this.ruleType.id}:${this.options.rule.id}: '${this.options.rule.name}'.`
|
||||
`Failed to update alert matched by maintenance window scoped query ${this.ruleInfoMessage}`,
|
||||
this.logTags
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -407,7 +415,8 @@ export class AlertsClient<
|
|||
private async persistAlertsHelper() {
|
||||
if (!this.ruleType.alerts?.shouldWrite) {
|
||||
this.options.logger.debug(
|
||||
`Resources registered and installed for ${this.ruleType.alerts?.context} context but "shouldWrite" is set to false.`
|
||||
`Resources registered and installed for ${this.ruleType.alerts?.context} context but "shouldWrite" is set to false ${this.ruleInfoMessage}.`,
|
||||
this.logTags
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
@ -482,7 +491,8 @@ export class AlertsClient<
|
|||
}
|
||||
} else {
|
||||
this.options.logger.error(
|
||||
`Error writing alert(${id}) to ${this.indexTemplateAndPattern.alias} - alert(${id}) doesn't exist in active alerts`
|
||||
`Error writing alert(${id}) to ${this.indexTemplateAndPattern.alias} - alert(${id}) doesn't exist in active alerts ${this.ruleInfoMessage}.`,
|
||||
this.logTags
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -529,7 +539,8 @@ export class AlertsClient<
|
|||
return true;
|
||||
} else if (!isValidAlertIndexName(alertIndex)) {
|
||||
this.options.logger.warn(
|
||||
`Could not update alert ${alertUuid} in ${alertIndex}. Partial and restored alert indices are not supported.`
|
||||
`Could not update alert ${alertUuid} in ${alertIndex}. Partial and restored alert indices are not supported ${this.ruleInfoMessage}.`,
|
||||
this.logTags
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
@ -573,11 +584,15 @@ export class AlertsClient<
|
|||
operations: bulkBody,
|
||||
},
|
||||
bulkResponse: response,
|
||||
ruleId: this.options.rule.id,
|
||||
ruleName: this.options.rule.name,
|
||||
ruleType: this.ruleType.id,
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
this.options.logger.error(
|
||||
`Error writing ${alertsToIndex.length} alerts to ${this.indexTemplateAndPattern.alias} - ${err.message}`
|
||||
`Error writing ${alertsToIndex.length} alerts to ${this.indexTemplateAndPattern.alias} ${this.ruleInfoMessage} - ${err.message}`,
|
||||
this.logTags
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -669,7 +684,10 @@ export class AlertsClient<
|
|||
});
|
||||
return response;
|
||||
} catch (err) {
|
||||
this.options.logger.warn(`Error updating alert maintenance window IDs: ${err}`);
|
||||
this.options.logger.warn(
|
||||
`Error updating alert maintenance window IDs ${this.ruleInfoMessage}: ${err}`,
|
||||
this.logTags
|
||||
);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
@ -739,7 +757,8 @@ export class AlertsClient<
|
|||
// Update alerts with new maintenance window IDs, await not needed
|
||||
this.updateAlertMaintenanceWindowIds(uniqueAlertsId).catch(() => {
|
||||
this.options.logger.debug(
|
||||
'Failed to update new alerts with scoped query maintenance window Ids by updateByQuery.'
|
||||
`Failed to update new alerts with scoped query maintenance window Ids by updateByQuery ${this.ruleInfoMessage}.`,
|
||||
this.logTags
|
||||
);
|
||||
});
|
||||
}
|
||||
|
|
|
@ -25,6 +25,12 @@ import { resolveAlertConflicts } from './alert_conflict_resolver';
|
|||
|
||||
const logger = loggingSystemMock.create().get();
|
||||
const esClient = elasticsearchServiceMock.createElasticsearchClient();
|
||||
const ruleId = 'rule-id';
|
||||
const ruleName = 'name of rule';
|
||||
const ruleType = 'rule-type';
|
||||
|
||||
const ruleInfo = `for ${ruleType}:${ruleId} '${ruleName}'`;
|
||||
const logTags = { tags: [ruleType, ruleId, 'resolve-alert-conflicts'] };
|
||||
|
||||
const alertDoc = {
|
||||
[EVENT_ACTION]: 'active',
|
||||
|
@ -45,11 +51,20 @@ describe('alert_conflict_resolver', () => {
|
|||
|
||||
esClient.mget.mockRejectedValueOnce(new Error('mget failed'));
|
||||
|
||||
await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
|
||||
await resolveAlertConflicts({
|
||||
logger,
|
||||
esClient,
|
||||
bulkRequest,
|
||||
bulkResponse,
|
||||
ruleId,
|
||||
ruleName,
|
||||
ruleType,
|
||||
});
|
||||
|
||||
expect(logger.error).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
'Error resolving alert conflicts: mget failed'
|
||||
`Error resolving alert conflicts ${ruleInfo}: mget failed`,
|
||||
logTags
|
||||
);
|
||||
});
|
||||
|
||||
|
@ -61,11 +76,20 @@ describe('alert_conflict_resolver', () => {
|
|||
});
|
||||
esClient.bulk.mockRejectedValueOnce(new Error('bulk failed'));
|
||||
|
||||
await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
|
||||
await resolveAlertConflicts({
|
||||
logger,
|
||||
esClient,
|
||||
bulkRequest,
|
||||
bulkResponse,
|
||||
ruleId,
|
||||
ruleName,
|
||||
ruleType,
|
||||
});
|
||||
|
||||
expect(logger.error).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
'Error resolving alert conflicts: bulk failed'
|
||||
`Error resolving alert conflicts ${ruleInfo}: bulk failed`,
|
||||
logTags
|
||||
);
|
||||
});
|
||||
});
|
||||
|
@ -73,13 +97,29 @@ describe('alert_conflict_resolver', () => {
|
|||
describe('is successful with', () => {
|
||||
test('no bulk results', async () => {
|
||||
const { bulkRequest, bulkResponse } = getReqRes('');
|
||||
await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
|
||||
await resolveAlertConflicts({
|
||||
logger,
|
||||
esClient,
|
||||
bulkRequest,
|
||||
bulkResponse,
|
||||
ruleId,
|
||||
ruleName,
|
||||
ruleType,
|
||||
});
|
||||
expect(logger.error).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test('no errors in bulk results', async () => {
|
||||
const { bulkRequest, bulkResponse } = getReqRes('c is is c is');
|
||||
await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
|
||||
await resolveAlertConflicts({
|
||||
logger,
|
||||
esClient,
|
||||
bulkRequest,
|
||||
bulkResponse,
|
||||
ruleId,
|
||||
ruleName,
|
||||
ruleType,
|
||||
});
|
||||
expect(logger.error).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
|
@ -96,16 +136,30 @@ describe('alert_conflict_resolver', () => {
|
|||
items: [getBulkResItem(0)],
|
||||
});
|
||||
|
||||
await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
|
||||
await resolveAlertConflicts({
|
||||
logger,
|
||||
esClient,
|
||||
bulkRequest,
|
||||
bulkResponse,
|
||||
ruleId,
|
||||
ruleName,
|
||||
ruleType,
|
||||
});
|
||||
|
||||
expect(logger.error).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
`Error writing alerts: 0 successful, 1 conflicts, 0 errors: `
|
||||
`Error writing alerts ${ruleInfo}: 0 successful, 1 conflicts, 0 errors: `,
|
||||
logTags
|
||||
);
|
||||
expect(logger.info).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
`Retrying bulk update of 1 conflicted alerts ${ruleInfo}`,
|
||||
logTags
|
||||
);
|
||||
expect(logger.info).toHaveBeenNthCalledWith(1, `Retrying bulk update of 1 conflicted alerts`);
|
||||
expect(logger.info).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
`Retried bulk update of 1 conflicted alerts succeeded`
|
||||
`Retried bulk update of 1 conflicted alerts succeeded ${ruleInfo}`,
|
||||
logTags
|
||||
);
|
||||
});
|
||||
|
||||
|
@ -122,16 +176,30 @@ describe('alert_conflict_resolver', () => {
|
|||
items: [getBulkResItem(2)],
|
||||
});
|
||||
|
||||
await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
|
||||
await resolveAlertConflicts({
|
||||
logger,
|
||||
esClient,
|
||||
bulkRequest,
|
||||
bulkResponse,
|
||||
ruleId,
|
||||
ruleName,
|
||||
ruleType,
|
||||
});
|
||||
|
||||
expect(logger.error).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
`Error writing alerts: 2 successful, 1 conflicts, 1 errors: hallo`
|
||||
`Error writing alerts ${ruleInfo}: 2 successful, 1 conflicts, 1 errors: hallo`,
|
||||
logTags
|
||||
);
|
||||
expect(logger.info).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
`Retrying bulk update of 1 conflicted alerts ${ruleInfo}`,
|
||||
logTags
|
||||
);
|
||||
expect(logger.info).toHaveBeenNthCalledWith(1, `Retrying bulk update of 1 conflicted alerts`);
|
||||
expect(logger.info).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
`Retried bulk update of 1 conflicted alerts succeeded`
|
||||
`Retried bulk update of 1 conflicted alerts succeeded ${ruleInfo}`,
|
||||
logTags
|
||||
);
|
||||
});
|
||||
|
||||
|
@ -148,16 +216,30 @@ describe('alert_conflict_resolver', () => {
|
|||
items: [getBulkResItem(2), getBulkResItem(3), getBulkResItem(5)],
|
||||
});
|
||||
|
||||
await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
|
||||
await resolveAlertConflicts({
|
||||
logger,
|
||||
esClient,
|
||||
bulkRequest,
|
||||
bulkResponse,
|
||||
ruleId,
|
||||
ruleName,
|
||||
ruleType,
|
||||
});
|
||||
|
||||
expect(logger.error).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
`Error writing alerts: 2 successful, 3 conflicts, 1 errors: hallo`
|
||||
`Error writing alerts ${ruleInfo}: 2 successful, 3 conflicts, 1 errors: hallo`,
|
||||
logTags
|
||||
);
|
||||
expect(logger.info).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
`Retrying bulk update of 3 conflicted alerts ${ruleInfo}`,
|
||||
logTags
|
||||
);
|
||||
expect(logger.info).toHaveBeenNthCalledWith(1, `Retrying bulk update of 3 conflicted alerts`);
|
||||
expect(logger.info).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
`Retried bulk update of 3 conflicted alerts succeeded`
|
||||
`Retried bulk update of 3 conflicted alerts succeeded ${ruleInfo}`,
|
||||
logTags
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
|
@ -35,6 +35,9 @@ export interface ResolveAlertConflictsParams {
|
|||
logger: Logger;
|
||||
bulkRequest: BulkRequest<unknown, unknown>;
|
||||
bulkResponse: BulkResponse;
|
||||
ruleId: string;
|
||||
ruleName: string;
|
||||
ruleType: string;
|
||||
}
|
||||
|
||||
interface NormalizedBulkRequest {
|
||||
|
@ -46,26 +49,33 @@ interface NormalizedBulkRequest {
|
|||
// to replace just logging that the error occurred, so we don't want
|
||||
// to cause _more_ errors ...
|
||||
export async function resolveAlertConflicts(params: ResolveAlertConflictsParams): Promise<void> {
|
||||
const { logger } = params;
|
||||
const { logger, ruleId, ruleType, ruleName } = params;
|
||||
const ruleInfoMessage = `for ${ruleType}:${ruleId} '${ruleName}'`;
|
||||
const logTags = { tags: [ruleType, ruleId, 'resolve-alert-conflicts'] };
|
||||
|
||||
try {
|
||||
await resolveAlertConflicts_(params);
|
||||
} catch (err) {
|
||||
logger.error(`Error resolving alert conflicts: ${err.message}`);
|
||||
logger.error(`Error resolving alert conflicts ${ruleInfoMessage}: ${err.message}`, logTags);
|
||||
}
|
||||
}
|
||||
|
||||
async function resolveAlertConflicts_(params: ResolveAlertConflictsParams): Promise<void> {
|
||||
const { logger, esClient, bulkRequest, bulkResponse } = params;
|
||||
const { logger, esClient, bulkRequest, bulkResponse, ruleId, ruleType, ruleName } = params;
|
||||
if (bulkRequest.operations && bulkRequest.operations?.length === 0) return;
|
||||
if (bulkResponse.items && bulkResponse.items?.length === 0) return;
|
||||
|
||||
const ruleInfoMessage = `for ${ruleType}:${ruleId} '${ruleName}'`;
|
||||
const logTags = { tags: [ruleType, ruleId, 'resolve-alert-conflicts'] };
|
||||
|
||||
// get numbers for a summary log message
|
||||
const { success, errors, conflicts, messages } = getResponseStats(bulkResponse);
|
||||
if (conflicts === 0 && errors === 0) return;
|
||||
|
||||
const allMessages = messages.join('; ');
|
||||
logger.error(
|
||||
`Error writing alerts: ${success} successful, ${conflicts} conflicts, ${errors} errors: ${allMessages}`
|
||||
`Error writing alerts ${ruleInfoMessage}: ${success} successful, ${conflicts} conflicts, ${errors} errors: ${allMessages}`,
|
||||
logTags
|
||||
);
|
||||
|
||||
// get a new bulk request for just conflicted docs
|
||||
|
@ -79,14 +89,18 @@ async function resolveAlertConflicts_(params: ResolveAlertConflictsParams): Prom
|
|||
await updateOCC(conflictRequest, freshDocs);
|
||||
await refreshFieldsInDocs(conflictRequest, freshDocs);
|
||||
|
||||
logger.info(`Retrying bulk update of ${conflictRequest.length} conflicted alerts`);
|
||||
logger.info(
|
||||
`Retrying bulk update of ${conflictRequest.length} conflicted alerts ${ruleInfoMessage}`,
|
||||
logTags
|
||||
);
|
||||
const mbrResponse = await makeBulkRequest(params.esClient, params.bulkRequest, conflictRequest);
|
||||
|
||||
if (mbrResponse.bulkResponse?.items.length !== conflictRequest.length) {
|
||||
const actual = mbrResponse.bulkResponse?.items.length;
|
||||
const expected = conflictRequest.length;
|
||||
logger.error(
|
||||
`Unexpected number of bulk response items retried; expecting ${expected}, retried ${actual}`
|
||||
`Unexpected number of bulk response items retried; expecting ${expected}, retried ${actual} ${ruleInfoMessage}`,
|
||||
logTags
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
@ -94,16 +108,21 @@ async function resolveAlertConflicts_(params: ResolveAlertConflictsParams): Prom
|
|||
if (mbrResponse.error) {
|
||||
const index = bulkRequest.index || 'unknown index';
|
||||
logger.error(
|
||||
`Error writing ${conflictRequest.length} alerts to ${index} - ${mbrResponse.error.message}`
|
||||
`Error writing ${conflictRequest.length} alerts to ${index} ${ruleInfoMessage} - ${mbrResponse.error.message}`,
|
||||
logTags
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
if (mbrResponse.errors === 0) {
|
||||
logger.info(`Retried bulk update of ${conflictRequest.length} conflicted alerts succeeded`);
|
||||
logger.info(
|
||||
`Retried bulk update of ${conflictRequest.length} conflicted alerts succeeded ${ruleInfoMessage}`,
|
||||
logTags
|
||||
);
|
||||
} else {
|
||||
logger.error(
|
||||
`Retried bulk update of ${conflictRequest.length} conflicted alerts still had ${mbrResponse.errors} conflicts`
|
||||
`Retried bulk update of ${conflictRequest.length} conflicted alerts still had ${mbrResponse.errors} conflicts ${ruleInfoMessage}`,
|
||||
logTags
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue