[ResponseOps][alerting] add rule info to logging in alertsClient (#190857)

## Summary

While investigating some issues with the alertsClient, I realized that
we weren't writing out any rule information for the logged messages.
This made debugging quite difficult, as I wanted to see the rule, so had
to search through the alerts indices for the specified alert to get it's
rule id, rule type, etc.

As an example, see https://github.com/elastic/kibana/issues/190376

This PR adds that kind of rule info to the logged messages in
alertsClient, as well as the typical sort of tags we write out (rule id,
rule type, module).
This commit is contained in:
Patrick Mueller 2024-08-22 10:29:22 -04:00 committed by GitHub
parent e782811877
commit 07717a43ab
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 172 additions and 42 deletions

View file

@ -287,6 +287,9 @@ const defaultExecutionOpts = {
startedAt: null,
};
const ruleInfo = `for test.rule-type:1 'rule-name'`;
const logTags = { tags: ['test.rule-type', '1', 'alerts-client'] };
describe('Alerts Client', () => {
let alertsClientParams: AlertsClientParams;
let processAndLogAlertsOpts: ProcessAndLogAlertsOpts;
@ -484,7 +487,8 @@ describe('Alerts Client', () => {
});
expect(logger.error).toHaveBeenCalledWith(
`Error searching for tracked alerts by UUID - search failed!`
`Error searching for tracked alerts by UUID ${ruleInfo} - search failed!`,
logTags
);
spy.mockRestore();
@ -778,7 +782,8 @@ describe('Alerts Client', () => {
expect(spy).toHaveBeenNthCalledWith(2, 'recoveredCurrent');
expect(logger.error).toHaveBeenCalledWith(
"Error writing alert(2) to .alerts-test.alerts-default - alert(2) doesn't exist in active alerts"
`Error writing alert(2) to .alerts-test.alerts-default - alert(2) doesn't exist in active alerts ${ruleInfo}.`,
logTags
);
spy.mockRestore();
@ -1346,7 +1351,8 @@ describe('Alerts Client', () => {
expect(clusterClient.bulk).toHaveBeenCalled();
expect(logger.error).toHaveBeenCalledWith(
`Error writing alerts: 1 successful, 0 conflicts, 2 errors: Validation Failed: 1: index is missing;2: type is missing;; failed to parse field [process.command_line] of type [wildcard] in document with id 'f0c9805be95fedbc3c99c663f7f02cc15826c122'.`
`Error writing alerts ${ruleInfo}: 1 successful, 0 conflicts, 2 errors: Validation Failed: 1: index is missing;2: type is missing;; failed to parse field [process.command_line] of type [wildcard] in document with id 'f0c9805be95fedbc3c99c663f7f02cc15826c122'.`,
{ tags: ['test.rule-type', '1', 'resolve-alert-conflicts'] }
);
});
@ -1423,7 +1429,8 @@ describe('Alerts Client', () => {
});
expect(logger.warn).toHaveBeenCalledWith(
`Could not update alert abc in partial-.internal.alerts-test.alerts-default-000001. Partial and restored alert indices are not supported.`
`Could not update alert abc in partial-.internal.alerts-test.alerts-default-000001. Partial and restored alert indices are not supported ${ruleInfo}.`,
logTags
);
});
@ -1448,7 +1455,8 @@ describe('Alerts Client', () => {
expect(clusterClient.bulk).toHaveBeenCalled();
expect(logger.error).toHaveBeenCalledWith(
`Error writing 2 alerts to .alerts-test.alerts-default - fail`
`Error writing 2 alerts to .alerts-test.alerts-default ${ruleInfo} - fail`,
logTags
);
});
@ -1478,7 +1486,8 @@ describe('Alerts Client', () => {
});
expect(logger.debug).toHaveBeenCalledWith(
`Resources registered and installed for test context but "shouldWrite" is set to false.`
`Resources registered and installed for test context but "shouldWrite" is set to false ${ruleInfo}.`,
logTags
);
expect(clusterClient.bulk).not.toHaveBeenCalled();
});
@ -2026,7 +2035,8 @@ describe('Alerts Client', () => {
).rejects.toBe('something went wrong!');
expect(logger.warn).toHaveBeenCalledWith(
'Error updating alert maintenance window IDs: something went wrong!'
`Error updating alert maintenance window IDs for test.rule-type:1 'rule-name': something went wrong!`,
logTags
);
});
});

View file

@ -112,6 +112,8 @@ export class AlertsClient<
private reportedAlerts: Record<string, DeepPartial<AlertData>> = {};
private _isUsingDataStreams: boolean;
private ruleInfoMessage: string;
private logTags: { tags: string[] };
constructor(private readonly options: AlertsClientParams) {
this.legacyAlertsClient = new LegacyAlertsClient<
@ -130,6 +132,8 @@ export class AlertsClient<
this.rule = formatRule({ rule: this.options.rule, ruleType: this.options.ruleType });
this.ruleType = options.ruleType;
this._isUsingDataStreams = this.options.dataStreamAdapter.isUsingDataStreams();
this.ruleInfoMessage = `for ${this.ruleType.id}:${this.options.rule.id} '${this.options.rule.name}'`;
this.logTags = { tags: [this.ruleType.id, this.options.rule.id, 'alerts-client'] };
}
public async initializeExecution(opts: InitializeExecutionOpts) {
@ -202,7 +206,10 @@ export class AlertsClient<
this.fetchedAlerts.primaryTerm[alertUuid] = hit._primary_term;
}
} catch (err) {
this.options.logger.error(`Error searching for tracked alerts by UUID - ${err.message}`);
this.options.logger.error(
`Error searching for tracked alerts by UUID ${this.ruleInfoMessage} - ${err.message}`,
this.logTags
);
}
}
@ -327,7 +334,8 @@ export class AlertsClient<
);
} catch (e) {
this.options.logger.debug(
`Failed to update alert matched by maintenance window scoped query for rule ${this.ruleType.id}:${this.options.rule.id}: '${this.options.rule.name}'.`
`Failed to update alert matched by maintenance window scoped query ${this.ruleInfoMessage}`,
this.logTags
);
}
@ -407,7 +415,8 @@ export class AlertsClient<
private async persistAlertsHelper() {
if (!this.ruleType.alerts?.shouldWrite) {
this.options.logger.debug(
`Resources registered and installed for ${this.ruleType.alerts?.context} context but "shouldWrite" is set to false.`
`Resources registered and installed for ${this.ruleType.alerts?.context} context but "shouldWrite" is set to false ${this.ruleInfoMessage}.`,
this.logTags
);
return;
}
@ -482,7 +491,8 @@ export class AlertsClient<
}
} else {
this.options.logger.error(
`Error writing alert(${id}) to ${this.indexTemplateAndPattern.alias} - alert(${id}) doesn't exist in active alerts`
`Error writing alert(${id}) to ${this.indexTemplateAndPattern.alias} - alert(${id}) doesn't exist in active alerts ${this.ruleInfoMessage}.`,
this.logTags
);
}
}
@ -529,7 +539,8 @@ export class AlertsClient<
return true;
} else if (!isValidAlertIndexName(alertIndex)) {
this.options.logger.warn(
`Could not update alert ${alertUuid} in ${alertIndex}. Partial and restored alert indices are not supported.`
`Could not update alert ${alertUuid} in ${alertIndex}. Partial and restored alert indices are not supported ${this.ruleInfoMessage}.`,
this.logTags
);
return false;
}
@ -573,11 +584,15 @@ export class AlertsClient<
operations: bulkBody,
},
bulkResponse: response,
ruleId: this.options.rule.id,
ruleName: this.options.rule.name,
ruleType: this.ruleType.id,
});
}
} catch (err) {
this.options.logger.error(
`Error writing ${alertsToIndex.length} alerts to ${this.indexTemplateAndPattern.alias} - ${err.message}`
`Error writing ${alertsToIndex.length} alerts to ${this.indexTemplateAndPattern.alias} ${this.ruleInfoMessage} - ${err.message}`,
this.logTags
);
}
}
@ -669,7 +684,10 @@ export class AlertsClient<
});
return response;
} catch (err) {
this.options.logger.warn(`Error updating alert maintenance window IDs: ${err}`);
this.options.logger.warn(
`Error updating alert maintenance window IDs ${this.ruleInfoMessage}: ${err}`,
this.logTags
);
throw err;
}
}
@ -739,7 +757,8 @@ export class AlertsClient<
// Update alerts with new maintenance window IDs, await not needed
this.updateAlertMaintenanceWindowIds(uniqueAlertsId).catch(() => {
this.options.logger.debug(
'Failed to update new alerts with scoped query maintenance window Ids by updateByQuery.'
`Failed to update new alerts with scoped query maintenance window Ids by updateByQuery ${this.ruleInfoMessage}.`,
this.logTags
);
});
}

View file

@ -25,6 +25,12 @@ import { resolveAlertConflicts } from './alert_conflict_resolver';
const logger = loggingSystemMock.create().get();
const esClient = elasticsearchServiceMock.createElasticsearchClient();
const ruleId = 'rule-id';
const ruleName = 'name of rule';
const ruleType = 'rule-type';
const ruleInfo = `for ${ruleType}:${ruleId} '${ruleName}'`;
const logTags = { tags: [ruleType, ruleId, 'resolve-alert-conflicts'] };
const alertDoc = {
[EVENT_ACTION]: 'active',
@ -45,11 +51,20 @@ describe('alert_conflict_resolver', () => {
esClient.mget.mockRejectedValueOnce(new Error('mget failed'));
await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
await resolveAlertConflicts({
logger,
esClient,
bulkRequest,
bulkResponse,
ruleId,
ruleName,
ruleType,
});
expect(logger.error).toHaveBeenNthCalledWith(
2,
'Error resolving alert conflicts: mget failed'
`Error resolving alert conflicts ${ruleInfo}: mget failed`,
logTags
);
});
@ -61,11 +76,20 @@ describe('alert_conflict_resolver', () => {
});
esClient.bulk.mockRejectedValueOnce(new Error('bulk failed'));
await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
await resolveAlertConflicts({
logger,
esClient,
bulkRequest,
bulkResponse,
ruleId,
ruleName,
ruleType,
});
expect(logger.error).toHaveBeenNthCalledWith(
2,
'Error resolving alert conflicts: bulk failed'
`Error resolving alert conflicts ${ruleInfo}: bulk failed`,
logTags
);
});
});
@ -73,13 +97,29 @@ describe('alert_conflict_resolver', () => {
describe('is successful with', () => {
test('no bulk results', async () => {
const { bulkRequest, bulkResponse } = getReqRes('');
await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
await resolveAlertConflicts({
logger,
esClient,
bulkRequest,
bulkResponse,
ruleId,
ruleName,
ruleType,
});
expect(logger.error).not.toHaveBeenCalled();
});
test('no errors in bulk results', async () => {
const { bulkRequest, bulkResponse } = getReqRes('c is is c is');
await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
await resolveAlertConflicts({
logger,
esClient,
bulkRequest,
bulkResponse,
ruleId,
ruleName,
ruleType,
});
expect(logger.error).not.toHaveBeenCalled();
});
@ -96,16 +136,30 @@ describe('alert_conflict_resolver', () => {
items: [getBulkResItem(0)],
});
await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
await resolveAlertConflicts({
logger,
esClient,
bulkRequest,
bulkResponse,
ruleId,
ruleName,
ruleType,
});
expect(logger.error).toHaveBeenNthCalledWith(
1,
`Error writing alerts: 0 successful, 1 conflicts, 0 errors: `
`Error writing alerts ${ruleInfo}: 0 successful, 1 conflicts, 0 errors: `,
logTags
);
expect(logger.info).toHaveBeenNthCalledWith(
1,
`Retrying bulk update of 1 conflicted alerts ${ruleInfo}`,
logTags
);
expect(logger.info).toHaveBeenNthCalledWith(1, `Retrying bulk update of 1 conflicted alerts`);
expect(logger.info).toHaveBeenNthCalledWith(
2,
`Retried bulk update of 1 conflicted alerts succeeded`
`Retried bulk update of 1 conflicted alerts succeeded ${ruleInfo}`,
logTags
);
});
@ -122,16 +176,30 @@ describe('alert_conflict_resolver', () => {
items: [getBulkResItem(2)],
});
await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
await resolveAlertConflicts({
logger,
esClient,
bulkRequest,
bulkResponse,
ruleId,
ruleName,
ruleType,
});
expect(logger.error).toHaveBeenNthCalledWith(
1,
`Error writing alerts: 2 successful, 1 conflicts, 1 errors: hallo`
`Error writing alerts ${ruleInfo}: 2 successful, 1 conflicts, 1 errors: hallo`,
logTags
);
expect(logger.info).toHaveBeenNthCalledWith(
1,
`Retrying bulk update of 1 conflicted alerts ${ruleInfo}`,
logTags
);
expect(logger.info).toHaveBeenNthCalledWith(1, `Retrying bulk update of 1 conflicted alerts`);
expect(logger.info).toHaveBeenNthCalledWith(
2,
`Retried bulk update of 1 conflicted alerts succeeded`
`Retried bulk update of 1 conflicted alerts succeeded ${ruleInfo}`,
logTags
);
});
@ -148,16 +216,30 @@ describe('alert_conflict_resolver', () => {
items: [getBulkResItem(2), getBulkResItem(3), getBulkResItem(5)],
});
await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
await resolveAlertConflicts({
logger,
esClient,
bulkRequest,
bulkResponse,
ruleId,
ruleName,
ruleType,
});
expect(logger.error).toHaveBeenNthCalledWith(
1,
`Error writing alerts: 2 successful, 3 conflicts, 1 errors: hallo`
`Error writing alerts ${ruleInfo}: 2 successful, 3 conflicts, 1 errors: hallo`,
logTags
);
expect(logger.info).toHaveBeenNthCalledWith(
1,
`Retrying bulk update of 3 conflicted alerts ${ruleInfo}`,
logTags
);
expect(logger.info).toHaveBeenNthCalledWith(1, `Retrying bulk update of 3 conflicted alerts`);
expect(logger.info).toHaveBeenNthCalledWith(
2,
`Retried bulk update of 3 conflicted alerts succeeded`
`Retried bulk update of 3 conflicted alerts succeeded ${ruleInfo}`,
logTags
);
});
});

View file

@ -35,6 +35,9 @@ export interface ResolveAlertConflictsParams {
logger: Logger;
bulkRequest: BulkRequest<unknown, unknown>;
bulkResponse: BulkResponse;
ruleId: string;
ruleName: string;
ruleType: string;
}
interface NormalizedBulkRequest {
@ -46,26 +49,33 @@ interface NormalizedBulkRequest {
// to replace just logging that the error occurred, so we don't want
// to cause _more_ errors ...
export async function resolveAlertConflicts(params: ResolveAlertConflictsParams): Promise<void> {
const { logger } = params;
const { logger, ruleId, ruleType, ruleName } = params;
const ruleInfoMessage = `for ${ruleType}:${ruleId} '${ruleName}'`;
const logTags = { tags: [ruleType, ruleId, 'resolve-alert-conflicts'] };
try {
await resolveAlertConflicts_(params);
} catch (err) {
logger.error(`Error resolving alert conflicts: ${err.message}`);
logger.error(`Error resolving alert conflicts ${ruleInfoMessage}: ${err.message}`, logTags);
}
}
async function resolveAlertConflicts_(params: ResolveAlertConflictsParams): Promise<void> {
const { logger, esClient, bulkRequest, bulkResponse } = params;
const { logger, esClient, bulkRequest, bulkResponse, ruleId, ruleType, ruleName } = params;
if (bulkRequest.operations && bulkRequest.operations?.length === 0) return;
if (bulkResponse.items && bulkResponse.items?.length === 0) return;
const ruleInfoMessage = `for ${ruleType}:${ruleId} '${ruleName}'`;
const logTags = { tags: [ruleType, ruleId, 'resolve-alert-conflicts'] };
// get numbers for a summary log message
const { success, errors, conflicts, messages } = getResponseStats(bulkResponse);
if (conflicts === 0 && errors === 0) return;
const allMessages = messages.join('; ');
logger.error(
`Error writing alerts: ${success} successful, ${conflicts} conflicts, ${errors} errors: ${allMessages}`
`Error writing alerts ${ruleInfoMessage}: ${success} successful, ${conflicts} conflicts, ${errors} errors: ${allMessages}`,
logTags
);
// get a new bulk request for just conflicted docs
@ -79,14 +89,18 @@ async function resolveAlertConflicts_(params: ResolveAlertConflictsParams): Prom
await updateOCC(conflictRequest, freshDocs);
await refreshFieldsInDocs(conflictRequest, freshDocs);
logger.info(`Retrying bulk update of ${conflictRequest.length} conflicted alerts`);
logger.info(
`Retrying bulk update of ${conflictRequest.length} conflicted alerts ${ruleInfoMessage}`,
logTags
);
const mbrResponse = await makeBulkRequest(params.esClient, params.bulkRequest, conflictRequest);
if (mbrResponse.bulkResponse?.items.length !== conflictRequest.length) {
const actual = mbrResponse.bulkResponse?.items.length;
const expected = conflictRequest.length;
logger.error(
`Unexpected number of bulk response items retried; expecting ${expected}, retried ${actual}`
`Unexpected number of bulk response items retried; expecting ${expected}, retried ${actual} ${ruleInfoMessage}`,
logTags
);
return;
}
@ -94,16 +108,21 @@ async function resolveAlertConflicts_(params: ResolveAlertConflictsParams): Prom
if (mbrResponse.error) {
const index = bulkRequest.index || 'unknown index';
logger.error(
`Error writing ${conflictRequest.length} alerts to ${index} - ${mbrResponse.error.message}`
`Error writing ${conflictRequest.length} alerts to ${index} ${ruleInfoMessage} - ${mbrResponse.error.message}`,
logTags
);
return;
}
if (mbrResponse.errors === 0) {
logger.info(`Retried bulk update of ${conflictRequest.length} conflicted alerts succeeded`);
logger.info(
`Retried bulk update of ${conflictRequest.length} conflicted alerts succeeded ${ruleInfoMessage}`,
logTags
);
} else {
logger.error(
`Retried bulk update of ${conflictRequest.length} conflicted alerts still had ${mbrResponse.errors} conflicts`
`Retried bulk update of ${conflictRequest.length} conflicted alerts still had ${mbrResponse.errors} conflicts ${ruleInfoMessage}`,
logTags
);
}
}