[ML] Transforms: Improve messages for recovered alerts (#205721)

This commit is contained in:
Dima Arnautov 2025-01-09 17:53:09 +01:00 committed by GitHub
parent a56227bf37
commit 737cf96809
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 236 additions and 117 deletions

View file

@ -22,6 +22,9 @@ export function getResultTestConfig(config: TransformHealthRuleTestsConfig) {
notStarted: {
enabled: config?.notStarted?.enabled ?? true,
},
/**
* @deprecated replaced in favor of healthCheck in 8.8
*/
errorMessages: {
enabled: config?.errorMessages?.enabled ?? false,
},

View file

@ -26,6 +26,7 @@ import {
type TransformHealthStatus,
TRANSFORM_RULE_TYPE,
TRANSFORM_HEALTH_RESULTS,
TRANSFORM_HEALTH_CHECK_NAMES,
} from '../../../../common/constants';
import type { TransformHealthRuleParams } from './schema';
import { transformHealthRuleParams } from './schema';
@ -74,6 +75,11 @@ interface RegisterParams {
getFieldFormatsStart: () => FieldFormatsStart;
}
export interface TransformHealthAlertState extends RuleTypeState {
notStarted?: string[];
unhealthy?: string[];
}
export const TRANSFORM_HEALTH_AAD_INDEX_NAME = 'transform.health';
export const TRANSFORM_HEALTH_AAD_CONFIG: IRuleTypeAlerts<TransformHealthAlert> = {
@ -109,7 +115,7 @@ export function getTransformHealthRuleType(
): RuleType<
TransformHealthRuleParams,
never,
RuleTypeState,
TransformHealthAlertState,
AlertInstanceState,
TransformHealthAlertContext,
TransformIssue,
@ -162,6 +168,7 @@ export function getTransformHealthRuleType(
const {
services: { scopedClusterClient, alertsClient, uiSettingsClient },
params,
state: previousState,
} = options;
if (!alertsClient) {
@ -177,12 +184,24 @@ export function getTransformHealthRuleType(
fieldFormatsRegistry,
});
const executionResult = await transformHealthService.getHealthChecksResults(params);
const executionResult = await transformHealthService.getHealthChecksResults(
params,
previousState
);
const unhealthyTests = executionResult.filter(({ isHealthy }) => !isHealthy);
const state: TransformHealthAlertState = {};
if (unhealthyTests.length > 0) {
unhealthyTests.forEach(({ name: alertInstanceName, context }) => {
switch (alertInstanceName) {
case TRANSFORM_HEALTH_CHECK_NAMES.notStarted.name:
state.notStarted = context.results.map((r) => r.transform_id);
case TRANSFORM_HEALTH_CHECK_NAMES.healthCheck.name:
state.unhealthy = context.results.map((r) => r.transform_id);
}
alertsClient.report({
id: alertInstanceName,
actionGroup: TRANSFORM_ISSUE,
@ -198,7 +217,9 @@ export function getTransformHealthRuleType(
// Set context for recovered alerts
for (const recoveredAlert of alertsClient.getRecoveredAlerts()) {
const recoveredAlertId = recoveredAlert.alert.getId();
const testResult = executionResult.find((v) => v.name === recoveredAlertId);
if (testResult) {
alertsClient.setAlertData({
id: recoveredAlertId,
@ -211,7 +232,7 @@ export function getTransformHealthRuleType(
}
}
return { state: {} };
return { state };
},
};
}

View file

@ -28,43 +28,23 @@ describe('transformHealthServiceProvider', () => {
(esClient.transform.getTransform as jest.Mock).mockImplementation(
async ({ transform_id: transformId }) => {
if (transformId === 'transform4,transform6,transform6*') {
// arrangement for exclude transforms
return {
transforms: [
{
id: `transform4`,
sync: true,
},
{
id: `transform6`,
sync: true,
},
...new Array(10).fill(null).map((_, i) => ({
id: `transform6${i}`,
sync: true,
})),
],
} as unknown as TransformGetTransformResponse;
} else {
return {
transforms: [
// Mock continuous transforms
...new Array(102).fill(null).map((_, i) => ({
id: `transform${i}`,
sync: {
time: {
field: 'order_date',
delay: '60s',
},
return {
transforms: [
// Mock continuous transforms
...new Array(102).fill(null).map((_, i) => ({
id: `transform${i}`,
sync: {
time: {
field: 'order_date',
delay: '60s',
},
})),
{
id: 'transform102',
},
],
} as unknown as TransformGetTransformResponse;
}
})),
{
id: 'transform102',
},
],
} as unknown as TransformGetTransformResponse;
}
);
@ -85,29 +65,28 @@ describe('transformHealthServiceProvider', () => {
it('should fetch transform stats by transform IDs if the length does not exceed the URL limit', async () => {
const service = transformHealthServiceProvider({ esClient, rulesClient, fieldFormatsRegistry });
const result = await service.getHealthChecksResults({
includeTransforms: ['*'],
excludeTransforms: ['transform4', 'transform6', 'transform6*'],
testsConfig: null,
});
const result = await service.getHealthChecksResults(
{
includeTransforms: ['*'],
excludeTransforms: ['transform4', 'transform6', 'transform6*'],
testsConfig: null,
},
{}
);
expect(esClient.transform.getTransform).toHaveBeenCalledTimes(2);
expect(esClient.transform.getTransform).toHaveBeenCalledTimes(1);
expect(esClient.transform.getTransform).toHaveBeenCalledWith({
allow_no_match: true,
size: 1000,
});
expect(esClient.transform.getTransform).toHaveBeenCalledWith({
transform_id: 'transform4,transform6,transform6*',
allow_no_match: true,
size: 1000,
});
expect(esClient.transform.getTransformStats).toHaveBeenCalledTimes(1);
expect(esClient.transform.getTransformStats).toHaveBeenNthCalledWith(1, {
basic: true,
transform_id:
'transform0,transform1,transform2,transform3,transform5,transform7,transform8,transform9,transform10,transform11,transform12,transform13,transform14,transform15,transform16,transform17,transform18,transform19,transform20,transform21,transform22,transform23,transform24,transform25,transform26,transform27,transform28,transform29,transform30,transform31,transform32,transform33,transform34,transform35,transform36,transform37,transform38,transform39,transform40,transform41,transform42,transform43,transform44,transform45,transform46,transform47,transform48,transform49,transform50,transform51,transform52,transform53,transform54,transform55,transform56,transform57,transform58,transform59,transform70,transform71,transform72,transform73,transform74,transform75,transform76,transform77,transform78,transform79,transform80,transform81,transform82,transform83,transform84,transform85,transform86,transform87,transform88,transform89,transform90,transform91,transform92,transform93,transform94,transform95,transform96,transform97,transform98,transform99,transform100,transform101',
size: 90,
});
expect(result).toBeDefined();
@ -142,11 +121,14 @@ describe('transformHealthServiceProvider', () => {
} as unknown as TransformGetTransformStatsResponse);
const service = transformHealthServiceProvider({ esClient, rulesClient, fieldFormatsRegistry });
const result = await service.getHealthChecksResults({
includeTransforms: ['*'],
excludeTransforms: new Array(50).fill(null).map((_, i) => `${transformIdPrefix}${i + 60}`),
testsConfig: null,
});
const result = await service.getHealthChecksResults(
{
includeTransforms: ['*'],
excludeTransforms: new Array(50).fill(null).map((_, i) => `${transformIdPrefix}${i + 60}`),
testsConfig: null,
},
{}
);
expect(esClient.transform.getTransform).toHaveBeenCalledWith({
allow_no_match: true,
@ -156,15 +138,97 @@ describe('transformHealthServiceProvider', () => {
expect(esClient.transform.getTransformStats).toHaveBeenNthCalledWith(1, {
basic: true,
transform_id: '_all',
size: 1000,
});
const notStarted = result[0];
expect(notStarted.context.message).toEqual(
'Transform transform_with_a_very_long_id_that_result_in_long_url_for_sure_0, transform_with_a_very_long_id_that_result_in_long_url_for_sure_1, transform_with_a_very_long_id_that_result_in_long_url_for_sure_2, transform_with_a_very_long_id_that_result_in_long_url_for_sure_3, transform_with_a_very_long_id_that_result_in_long_url_for_sure_4, transform_with_a_very_long_id_that_result_in_long_url_for_sure_5, transform_with_a_very_long_id_that_result_in_long_url_for_sure_6, transform_with_a_very_long_id_that_result_in_long_url_for_sure_7, transform_with_a_very_long_id_that_result_in_long_url_for_sure_8, transform_with_a_very_long_id_that_result_in_long_url_for_sure_9, transform_with_a_very_long_id_that_result_in_long_url_for_sure_10, transform_with_a_very_long_id_that_result_in_long_url_for_sure_11, transform_with_a_very_long_id_that_result_in_long_url_for_sure_12, transform_with_a_very_long_id_that_result_in_long_url_for_sure_13, transform_with_a_very_long_id_that_result_in_long_url_for_sure_14, transform_with_a_very_long_id_that_result_in_long_url_for_sure_15, transform_with_a_very_long_id_that_result_in_long_url_for_sure_16, transform_with_a_very_long_id_that_result_in_long_url_for_sure_17, transform_with_a_very_long_id_that_result_in_long_url_for_sure_18, transform_with_a_very_long_id_that_result_in_long_url_for_sure_19, transform_with_a_very_long_id_that_result_in_long_url_for_sure_20, transform_with_a_very_long_id_that_result_in_long_url_for_sure_21, transform_with_a_very_long_id_that_result_in_long_url_for_sure_22, transform_with_a_very_long_id_that_result_in_long_url_for_sure_23, transform_with_a_very_long_id_that_result_in_long_url_for_sure_24, transform_with_a_very_long_id_that_result_in_long_url_for_sure_25, transform_with_a_very_long_id_that_result_in_long_url_for_sure_26, transform_with_a_very_long_id_that_result_in_long_url_for_sure_27, transform_with_a_very_long_id_that_result_in_long_url_for_sure_28, transform_with_a_very_long_id_that_result_in_long_url_for_sure_29, transform_with_a_very_long_id_that_result_in_long_url_for_sure_30, transform_with_a_very_long_id_that_result_in_long_url_for_sure_31, transform_with_a_very_long_id_that_result_in_long_url_for_sure_32, transform_with_a_very_long_id_that_result_in_long_url_for_sure_33, transform_with_a_very_long_id_that_result_in_long_url_for_sure_34, transform_with_a_very_long_id_that_result_in_long_url_for_sure_35, transform_with_a_very_long_id_that_result_in_long_url_for_sure_36, transform_with_a_very_long_id_that_result_in_long_url_for_sure_37, transform_with_a_very_long_id_that_result_in_long_url_for_sure_38, transform_with_a_very_long_id_that_result_in_long_url_for_sure_39, transform_with_a_very_long_id_that_result_in_long_url_for_sure_40, transform_with_a_very_long_id_that_result_in_long_url_for_sure_41, transform_with_a_very_long_id_that_result_in_long_url_for_sure_42, transform_with_a_very_long_id_that_result_in_long_url_for_sure_43, transform_with_a_very_long_id_that_result_in_long_url_for_sure_44, transform_with_a_very_long_id_that_result_in_long_url_for_sure_45, transform_with_a_very_long_id_that_result_in_long_url_for_sure_46, transform_with_a_very_long_id_that_result_in_long_url_for_sure_47, transform_with_a_very_long_id_that_result_in_long_url_for_sure_48, transform_with_a_very_long_id_that_result_in_long_url_for_sure_49, transform_with_a_very_long_id_that_result_in_long_url_for_sure_50, transform_with_a_very_long_id_that_result_in_long_url_for_sure_51, transform_with_a_very_long_id_that_result_in_long_url_for_sure_52, transform_with_a_very_long_id_that_result_in_long_url_for_sure_53, transform_with_a_very_long_id_that_result_in_long_url_for_sure_54, transform_with_a_very_long_id_that_result_in_long_url_for_sure_55, transform_with_a_very_long_id_that_result_in_long_url_for_sure_56, transform_with_a_very_long_id_that_result_in_long_url_for_sure_57, transform_with_a_very_long_id_that_result_in_long_url_for_sure_58, transform_with_a_very_long_id_that_result_in_long_url_for_sure_59 are not started.'
'60 transforms are not started: transform_with_a_very_long_id_that_result_in_long_url_for_sure_0, transform_with_a_very_long_id_that_result_in_long_url_for_sure_1, transform_with_a_very_long_id_that_result_in_long_url_for_sure_2, transform_with_a_very_long_id_that_result_in_long_url_for_sure_3, transform_with_a_very_long_id_that_result_in_long_url_for_sure_4, transform_with_a_very_long_id_that_result_in_long_url_for_sure_5, transform_with_a_very_long_id_that_result_in_long_url_for_sure_6, transform_with_a_very_long_id_that_result_in_long_url_for_sure_7, transform_with_a_very_long_id_that_result_in_long_url_for_sure_8, transform_with_a_very_long_id_that_result_in_long_url_for_sure_9 and 50 others.'
);
});
it('should mention recovered transforms based on the previous state', async () => {
const service = transformHealthServiceProvider({ esClient, rulesClient, fieldFormatsRegistry });
(esClient.transform.getTransformStats as jest.Mock).mockResolvedValue({
count: 2,
transforms: [
{
id: 'transform1',
state: 'started',
},
{
id: 'transform2',
state: 'started',
},
],
} as unknown as TransformGetTransformStatsResponse);
const result = await service.getHealthChecksResults(
{
includeTransforms: ['*'],
excludeTransforms: ['transform4', 'transform6', 'transform6*'],
testsConfig: {
notStarted: {
enabled: true,
},
healthCheck: {
enabled: false,
},
errorMessages: {
enabled: false,
},
},
},
{
notStarted: ['transform1', 'transform2'],
unhealthy: ['transform3'],
}
);
expect(result[0].context.message).toEqual('2 transforms are started: transform1, transform2.');
});
it('should work without previous execution state', async () => {
const service = transformHealthServiceProvider({ esClient, rulesClient, fieldFormatsRegistry });
(esClient.transform.getTransformStats as jest.Mock).mockResolvedValue({
count: 2,
transforms: [
{
id: 'transform1',
state: 'started',
},
{
id: 'transform2',
state: 'started',
},
],
} as unknown as TransformGetTransformStatsResponse);
const result = await service.getHealthChecksResults(
{
includeTransforms: ['*'],
excludeTransforms: ['transform4', 'transform6', 'transform6*'],
testsConfig: {
notStarted: {
enabled: true,
},
healthCheck: {
enabled: false,
},
errorMessages: {
enabled: false,
},
},
},
{}
);
expect(result[0].context.message).toEqual('All transforms are started.');
});
describe('populateTransformsWithAssignedRules', () => {
it('should throw an error if rulesClient is missing', async () => {
const service = transformHealthServiceProvider({ esClient, fieldFormatsRegistry });

View file

@ -27,6 +27,7 @@ import { getResultTestConfig } from '../../../../common/utils/alerts';
import type {
ErrorMessagesTransformResponse,
TransformHealthAlertContext,
TransformHealthAlertState,
TransformStateReportResponse,
} from './register_transform_health_rule_type';
import type { TransformHealthAlertRule } from '../../../../common/types/alerting';
@ -44,6 +45,11 @@ type TransformWithAlertingRules = Transform & { alerting_rules: TransformHealthA
const maxPathComponentLength = 2000;
const TRANSFORM_PAGE_SIZE = 1000;
/** Number of transforms IDs mentioned in the context message */
const TRANSFORMS_IDS_MESSAGE_LIMIT = 10;
export function transformHealthServiceProvider({
esClient,
rulesClient,
@ -60,47 +66,56 @@ export function transformHealthServiceProvider({
* @param includeTransforms
* @param excludeTransforms
*/
const getResultsTransformIds = async (
const getResultsTransformIds = (
transforms: Transform[],
includeTransforms: string[],
excludeTransforms: string[] | null
): Promise<Set<string>> => {
const includeAll = includeTransforms.some((id) => id === ALL_TRANSFORMS_SELECTION);
): Set<string> => {
const continuousTransforms: Transform[] = transforms.filter(isContinuousTransform);
let resultTransformIds: string[] = [];
// Fetch transforms to make sure assigned transforms exists.
const transformsResponse = (
await esClient.transform.getTransform({
...(includeAll ? {} : { transform_id: includeTransforms.join(',') }),
allow_no_match: true,
size: 1000,
})
).transforms as Transform[];
transformsResponse.forEach((t) => {
continuousTransforms.forEach((t) => {
transformsDict.set(t.id, t);
// Include only continuously running transforms.
if (isContinuousTransform(t)) {
resultTransformIds.push(t.id);
}
});
if (excludeTransforms && excludeTransforms.length > 0) {
let excludeIdsSet = new Set(excludeTransforms);
if (excludeTransforms.some((id) => id.includes('*'))) {
const excludeTransformResponse = (
await esClient.transform.getTransform({
transform_id: excludeTransforms.join(','),
allow_no_match: true,
size: 1000,
})
).transforms as Transform[];
excludeIdsSet = new Set(excludeTransformResponse.map((t) => t.id));
}
resultTransformIds = resultTransformIds.filter((id) => !excludeIdsSet.has(id));
}
return new Set(
continuousTransforms
.filter(
(t) =>
includeTransforms.some((includedTransformId) =>
new RegExp('^' + includedTransformId.replace(/\*/g, '.*') + '$').test(t.id)
) &&
(Array.isArray(excludeTransforms) && excludeTransforms.length > 0
? excludeTransforms.every(
(excludedTransformId) =>
new RegExp('^' + excludedTransformId.replace(/\*/g, '.*') + '$').test(t.id) ===
false
)
: true)
)
.map((t) => t.id)
);
};
return new Set(resultTransformIds);
/**
* Returns a string with transform IDs for the context message.
*/
const getContextMessageTransformIds = (transformIds: string[]): string => {
const count = transformIds.length;
let transformsString = transformIds.join(', ');
if (transformIds.length > TRANSFORMS_IDS_MESSAGE_LIMIT) {
transformsString = i18n.translate(
'xpack.transform.alertTypes.transformHealth.truncatedTransformIdsMessage',
{
defaultMessage:
'{truncatedTransformIds} and {restCount, plural, one {# other} other {# others}}',
values: {
truncatedTransformIds: transformIds.slice(0, TRANSFORMS_IDS_MESSAGE_LIMIT).join(', '),
restCount: count - TRANSFORMS_IDS_MESSAGE_LIMIT,
},
}
);
}
return transformsString;
};
const getTransformStats = memoize(
@ -113,6 +128,7 @@ export function transformHealthServiceProvider({
transform_id: transformIdsString,
// @ts-expect-error `basic` query option not yet in @elastic/elasticsearch
basic: true,
size: transformIds.size,
})
).transforms as TransformStats[];
} else {
@ -123,6 +139,7 @@ export function transformHealthServiceProvider({
// @ts-expect-error `basic` query option not yet in @elastic/elasticsearch
basic: true,
transform_id: '_all',
size: TRANSFORM_PAGE_SIZE,
})
).transforms as TransformStats[]
).filter((t) => transformIds.has(t.id));
@ -269,8 +286,22 @@ export function transformHealthServiceProvider({
* Returns results of the transform health checks
* @param params
*/
async getHealthChecksResults(params: TransformHealthRuleParams) {
const transformIds = await getResultsTransformIds(
async getHealthChecksResults(
params: TransformHealthRuleParams,
previousState: TransformHealthAlertState
) {
const includeAll = params.includeTransforms.some((id) => id === ALL_TRANSFORMS_SELECTION);
const transforms = (
await esClient.transform.getTransform({
...(includeAll ? {} : { transform_id: params.includeTransforms.join(',') }),
allow_no_match: true,
size: TRANSFORM_PAGE_SIZE,
})
).transforms as Transform[];
const transformIds = getResultsTransformIds(
transforms,
params.includeTransforms,
params.excludeTransforms
);
@ -284,30 +315,37 @@ export function transformHealthServiceProvider({
transformIds
);
const prevNotStartedSet: Set<string> = new Set(previousState?.notStarted ?? []);
const recoveredTransforms = startedTransforms.filter((t) =>
prevNotStartedSet.has(t.transform_id)
);
const isHealthy = notStartedTransform.length === 0;
const count = isHealthy ? startedTransforms.length : notStartedTransform.length;
const transformsString = (isHealthy ? startedTransforms : notStartedTransform)
.map((t) => t.transform_id)
.join(', ');
// if healthy, mention transforms that were not started
const count = isHealthy ? recoveredTransforms.length : notStartedTransform.length;
const transformsString = getContextMessageTransformIds(
(isHealthy ? recoveredTransforms : notStartedTransform).map((t) => t.transform_id)
);
result.push({
isHealthy,
name: TRANSFORM_HEALTH_CHECK_NAMES.notStarted.name,
context: {
results: isHealthy ? startedTransforms : notStartedTransform,
results: isHealthy ? recoveredTransforms : notStartedTransform,
message: isHealthy
? i18n.translate(
'xpack.transform.alertTypes.transformHealth.notStartedRecoveryMessage',
{
defaultMessage:
'{count, plural, one {Transform} other {Transform}} {transformsString} {count, plural, one {is} other {are}} started.',
'{count, plural, =0 {All transforms are started} one {Transform {transformsString} is started} other {# transforms are started: {transformsString}}}.',
values: { count, transformsString },
}
)
: i18n.translate('xpack.transform.alertTypes.transformHealth.notStartedMessage', {
defaultMessage:
'{count, plural, one {Transform} other {Transform}} {transformsString} {count, plural, one {is} other {are}} not started.',
'{count, plural, one {Transform {transformsString} is not started} other {# transforms are not started: {transformsString}}}.',
values: { count, transformsString },
}),
},
@ -347,8 +385,12 @@ export function transformHealthServiceProvider({
if (testsConfig.healthCheck.enabled) {
const response = await this.getUnhealthyTransformsReport(transformIds);
const isHealthy = response.length === 0;
const count = response.length;
const transformsString = response.map((t) => t.transform_id).join(', ');
const count: number = isHealthy ? previousState?.unhealthy?.length ?? 0 : response.length;
const transformsString = getContextMessageTransformIds(
isHealthy ? previousState?.unhealthy ?? [] : response.map((t) => t.transform_id)
);
result.push({
isHealthy,
name: TRANSFORM_HEALTH_CHECK_NAMES.healthCheck.name,
@ -359,13 +401,13 @@ export function transformHealthServiceProvider({
'xpack.transform.alertTypes.transformHealth.healthCheckRecoveryMessage',
{
defaultMessage:
'{count, plural, one {Transform} other {Transforms}} {transformsString} {count, plural, one {is} other {are}} healthy.',
'{count, plural, =0 {All transforms are healthy} one {Transform {transformsString} is healthy} other {# transforms are healthy: {transformsString}}}.',
values: { count, transformsString },
}
)
: i18n.translate('xpack.transform.alertTypes.transformHealth.healthCheckMessage', {
defaultMessage:
'{count, plural, one {Transform} other {Transforms}} {transformsString} {count, plural, one {is} other {are}} unhealthy.',
'{count, plural, one {Transform {transformsString} is unhealthy} other {# transforms are unhealthy: {transformsString}}}.',
values: { count, transformsString },
}),
},
@ -406,21 +448,10 @@ export function transformHealthServiceProvider({
// Retrieve result transform IDs
const { includeTransforms, excludeTransforms } = ruleInstance.params;
const resultTransformIds = new Set(
transforms
.filter(
(t) =>
includeTransforms.some((includedTransformId) =>
new RegExp(includedTransformId.replace(/\*/g, '.*')).test(t.id)
) &&
(Array.isArray(excludeTransforms) && excludeTransforms.length > 0
? excludeTransforms.every(
(excludedTransformId) =>
new RegExp(excludedTransformId.replace(/\*/g, '.*')).test(t.id) === false
)
: true)
)
.map((t) => t.id)
const resultTransformIds = getResultsTransformIds(
transforms,
includeTransforms,
excludeTransforms
);
resultTransformIds.forEach((transformId) => {