Add new telemetry data from event-log index. (#140943)

* Add new telemetry data from eventLog index.
count_rules_by_execution_status_per_day,
count_connector_types_by_action_run_outcome_per_day,
This commit is contained in:
Ersin Erdal 2022-09-20 23:43:00 +02:00 committed by GitHub
parent 50b3381e75
commit 17a25b8230
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 300 additions and 4 deletions

View file

@ -721,6 +721,36 @@ Object {
},
},
},
count_connector_types_by_action_run_outcome_per_day: {
actionSavedObjects: {
connector_types: {
buckets: [
{
key: '.slack',
outcome: {
count: {
buckets: [
{ key: 'success', doc_count: 12 },
{ key: 'failure', doc_count: 1 },
],
},
},
},
{
key: '.email',
outcome: {
count: {
buckets: [
{ key: 'success', doc_count: 13 },
{ key: 'failure', doc_count: 2 },
],
},
},
},
],
},
},
},
},
}
);
@ -754,6 +784,16 @@ Object {
__slack: 7,
},
countTotal: 120,
countRunOutcomeByConnectorType: {
__email: {
failure: 2,
success: 13,
},
__slack: {
failure: 1,
success: 12,
},
},
hasErrors: false,
});
});
@ -775,6 +815,7 @@ Object {
"countByType": Object {},
"countFailed": 0,
"countFailedByType": Object {},
"countRunOutcomeByConnectorType": Object {},
"countTotal": 0,
"errorMessage": "oh no",
"hasErrors": true,

View file

@ -7,6 +7,11 @@
import { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/types';
import { ElasticsearchClient, Logger } from '@kbn/core/server';
import { AggregationsTermsAggregateBase } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import {
AvgActionRunOutcomeByConnectorTypeBucket,
parseActionRunOutcomeByConnectorTypesBucket,
} from './lib/parse_connector_type_bucket';
import { AlertHistoryEsIndexConnectorId } from '../../common';
import { ActionResult, PreConfiguredAction } from '../types';
@ -395,7 +400,7 @@ export async function getInUseTotalCount(
}
}
function replaceFirstAndLastDotSymbols(strToReplace: string) {
export function replaceFirstAndLastDotSymbols(strToReplace: string) {
const hasFirstSymbolDot = strToReplace.startsWith('.');
const appliedString = hasFirstSymbolDot ? strToReplace.replace('.', '__') : strToReplace;
const hasLastSymbolDot = strToReplace.endsWith('.');
@ -415,6 +420,7 @@ export async function getExecutionsPerDayCount(
countFailedByType: Record<string, number>;
avgExecutionTime: number;
avgExecutionTimeByType: Record<string, number>;
countRunOutcomeByConnectorType: Record<string, number>;
}> {
const scriptedMetric = {
scripted_metric: {
@ -536,6 +542,35 @@ export async function getExecutionsPerDayCount(
},
},
},
count_connector_types_by_action_run_outcome_per_day: {
nested: {
path: 'kibana.saved_objects',
},
aggs: {
actionSavedObjects: {
filter: { term: { 'kibana.saved_objects.type': 'action' } },
aggs: {
connector_types: {
terms: {
field: 'kibana.saved_objects.type_id',
},
aggs: {
outcome: {
reverse_nested: {},
aggs: {
count: {
terms: {
field: 'event.outcome',
},
},
},
},
},
},
},
},
},
},
},
},
});
@ -564,6 +599,14 @@ export async function getExecutionsPerDayCount(
{}
);
const aggsCountConnectorTypeByActionRun = actionResults.aggregations as {
count_connector_types_by_action_run_outcome_per_day: {
actionSavedObjects: {
connector_types: AggregationsTermsAggregateBase<AvgActionRunOutcomeByConnectorTypeBucket>;
};
};
};
return {
hasErrors: false,
countTotal: aggsExecutions.total,
@ -586,6 +629,10 @@ export async function getExecutionsPerDayCount(
),
avgExecutionTime: aggsAvgExecutionTime,
avgExecutionTimeByType,
countRunOutcomeByConnectorType: parseActionRunOutcomeByConnectorTypesBucket(
aggsCountConnectorTypeByActionRun.count_connector_types_by_action_run_outcome_per_day
.actionSavedObjects.connector_types.buckets
),
};
} catch (err) {
const errorMessage = err && err.message ? err.message : err.toString();
@ -601,6 +648,7 @@ export async function getExecutionsPerDayCount(
countFailedByType: {},
avgExecutionTime: 0,
avgExecutionTimeByType: {},
countRunOutcomeByConnectorType: {},
};
}
}

View file

@ -47,6 +47,13 @@ export function createActionsUsageCollector(
count_actions_executions_failed_by_type_per_day: byTypeSchema,
avg_execution_time_per_day: { type: 'long' },
avg_execution_time_by_type_per_day: byTypeSchema,
count_connector_types_by_action_run_outcome_per_day: {
DYNAMIC_KEY: {
success: { type: 'long' },
failure: { type: 'long' },
unknown: { type: 'long' },
},
},
},
fetch: async () => {
try {
@ -77,6 +84,7 @@ export function createActionsUsageCollector(
count_actions_executions_failed_by_type_per_day: {},
avg_execution_time_per_day: 0,
avg_execution_time_by_type_per_day: {},
count_connector_types_by_action_run_outcome_per_day: {},
};
}
},

View file

@ -0,0 +1,88 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { parseActionRunOutcomeByConnectorTypesBucket } from './parse_connector_type_bucket';
describe('parseActionRunOutcomeByConnectorTypesBucket', () => {
test('should correctly parse connector type bucket results', () => {
expect(
parseActionRunOutcomeByConnectorTypesBucket([
{
key: '.server-log',
doc_count: 78,
outcome: {
count: {
buckets: [
{ key: 'success', doc_count: 2 },
{ key: 'failure', doc_count: 1 },
],
},
},
},
{
key: '.index',
doc_count: 42,
outcome: {
count: {
buckets: [
{ key: 'success', doc_count: 3 },
{ key: 'failure', doc_count: 4 },
],
},
},
},
])
).toEqual({
__index: {
failure: 4,
success: 3,
},
'__server-log': {
failure: 1,
success: 2,
},
});
});
test('should handle missing values', () => {
expect(
parseActionRunOutcomeByConnectorTypesBucket([
{
key: '.server-log',
doc_count: 78,
outcome: {
count: {
// @ts-expect-error
buckets: [{ key: 'success', doc_count: 2 }, { key: 'failure' }],
},
},
},
{
key: '.index',
outcome: {
// @ts-expect-error
count: {},
},
},
])
).toEqual({
'__server-log': {
failure: 0,
success: 2,
},
__index: {},
});
});
test('should handle empty input', () => {
expect(parseActionRunOutcomeByConnectorTypesBucket([])).toEqual({});
});
//
test('should handle undefined input', () => {
expect(parseActionRunOutcomeByConnectorTypesBucket(undefined)).toEqual({});
});
});

View file

@ -0,0 +1,30 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { AggregationsBuckets } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { replaceFirstAndLastDotSymbols } from '../actions_telemetry';
export interface AvgActionRunOutcomeByConnectorTypeBucket {
key: string;
doc_count: number; // Not used for duration telemetry but can be helpful later.
outcome: { count: { buckets: Array<{ key: string; doc_count: number }> } };
}
export function parseActionRunOutcomeByConnectorTypesBucket(
connectorTypeBuckets: AggregationsBuckets<AvgActionRunOutcomeByConnectorTypeBucket> = []
) {
const connectorTypes = connectorTypeBuckets as AvgActionRunOutcomeByConnectorTypeBucket[];
return connectorTypes.reduce((acc, connectorType) => {
const outcomes = connectorType.outcome?.count?.buckets ?? [];
return {
...acc,
[replaceFirstAndLastDotSymbols(connectorType.key)]: outcomes.reduce((accBucket, bucket) => {
return { ...accBucket, [replaceFirstAndLastDotSymbols(bucket.key)]: bucket.doc_count || 0 };
}, {}),
};
}, {});
}

View file

@ -130,6 +130,8 @@ export function telemetryTaskRunner(
totalExecutionsPerDay.countFailedByType,
avg_execution_time_per_day: totalExecutionsPerDay.avgExecutionTime,
avg_execution_time_by_type_per_day: totalExecutionsPerDay.avgExecutionTimeByType,
count_connector_types_by_action_run_outcome_per_day:
totalExecutionsPerDay.countRunOutcomeByConnectorType,
},
runAt: getNextMidnight(),
};

View file

@ -22,6 +22,7 @@ export interface ActionsUsage {
count_actions_executions_by_type_per_day: Record<string, number>;
count_actions_executions_failed_per_day: number;
count_actions_executions_failed_by_type_per_day: Record<string, number>;
count_connector_types_by_action_run_outcome_per_day: Record<string, Record<string, number>>;
avg_execution_time_per_day: number;
avg_execution_time_by_type_per_day: Record<string, number>;
}

View file

@ -116,6 +116,13 @@ const byStatusSchema: MakeSchemaFrom<AlertingUsage>['count_rules_by_execution_st
warning: { type: 'long' },
};
const byStatusPerDaySchema: MakeSchemaFrom<AlertingUsage>['count_rules_by_execution_status_per_day'] =
{
success: { type: 'long' },
failure: { type: 'long' },
unknown: { type: 'long' },
};
const byNotifyWhenSchema: MakeSchemaFrom<AlertingUsage>['count_rules_by_notify_when'] = {
on_action_group_change: { type: 'long' },
on_active_alert: { type: 'long' },
@ -200,6 +207,7 @@ export function createAlertingUsageCollector(
count_rules_muted: 0,
count_rules_with_muted_alerts: 0,
count_connector_types_by_consumers: {},
count_rules_by_execution_status_per_day: {},
avg_execution_time_per_day: 0,
avg_execution_time_by_type_per_day: {},
avg_es_search_duration_per_day: 0,
@ -283,6 +291,7 @@ export function createAlertingUsageCollector(
count_rules_muted: { type: 'long' },
count_rules_with_muted_alerts: { type: 'long' },
count_connector_types_by_consumers: { DYNAMIC_KEY: { DYNAMIC_KEY: { type: 'long' } } },
count_rules_by_execution_status_per_day: byStatusPerDaySchema,
avg_execution_time_per_day: { type: 'long' },
avg_execution_time_by_type_per_day: byTypeSchema,
avg_es_search_duration_per_day: { type: 'long' },

View file

@ -1291,6 +1291,14 @@ describe('event log telemetry', () => {
avg_total_search_duration: {
value: 28.630434782608695,
},
by_execution_status: {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [
{ key: 'success', doc_count: 21 },
{ key: 'failure', doc_count: 22 },
],
},
},
});
@ -1377,7 +1385,6 @@ describe('event log telemetry', () => {
logs__alert__document__count: 0,
},
},
alertsPercentilesByType: {
p50: {
'__index-threshold': 1,
@ -1398,6 +1405,10 @@ describe('event log telemetry', () => {
logs__alert__document__count: 0,
},
},
countRulesByExecutionStatus: {
failure: 22,
success: 21,
},
hasErrors: false,
});
});
@ -1437,6 +1448,7 @@ describe('event log telemetry', () => {
generatedActionsPercentilesByType: {},
alertsPercentiles: {},
alertsPercentilesByType: {},
countRulesByExecutionStatus: {},
});
});
});

View file

@ -54,15 +54,14 @@ interface GetExecutionsPerDayCountResults {
generatedActionsPercentilesByType: Record<string, Record<string, number>>;
alertsPercentiles: Record<string, number>;
alertsPercentilesByType: Record<string, Record<string, number>>;
countRulesByExecutionStatus: Record<string, number>;
}
interface GetExecutionTimeoutsPerDayCountResults {
hasErrors: boolean;
errorMessage?: string;
countExecutionTimeouts: number;
countExecutionTimeoutsByType: Record<string, number>;
}
interface GetExecutionCountsExecutionFailures extends AggregationsSingleBucketAggregateBase {
by_reason: AggregationsTermsAggregateBase<AggregationsStringTermsBucketKeys>;
}
@ -145,6 +144,11 @@ export async function getExecutionsPerDayCount({
},
aggs: eventLogAggs,
},
by_execution_status: {
terms: {
field: 'event.outcome',
},
},
},
},
};
@ -165,6 +169,7 @@ export async function getExecutionsPerDayCount({
avg_execution_time: AggregationsSingleMetricAggregateBase;
avg_es_search_duration: AggregationsSingleMetricAggregateBase;
avg_total_search_duration: AggregationsSingleMetricAggregateBase;
by_execution_status: AggregationsTermsAggregateBase<AggregationsStringTermsBucketKeys>;
};
const aggregationsByRuleTypeId: AggregationsBuckets<GetExecutionCountsAggregationBucket> =
@ -176,6 +181,9 @@ export async function getExecutionsPerDayCount({
...parseExecutionFailureByRuleType(aggregationsByRuleTypeId),
...parseExecutionCountAggregationResults(aggregations),
countTotalRuleExecutions: totalRuleExecutions ?? 0,
countRulesByExecutionStatus: parseSimpleRuleTypeBucket(
aggregations.by_execution_status.buckets
),
};
} catch (err) {
const errorMessage = err && err.message ? err.message : err.toString();
@ -204,6 +212,7 @@ export async function getExecutionsPerDayCount({
generatedActionsPercentilesByType: {},
alertsPercentiles: {},
alertsPercentilesByType: {},
countRulesByExecutionStatus: {},
};
}
}
@ -313,6 +322,14 @@ export async function getExecutionTimeoutsPerDayCount({
* avg_total_search_duration: { // average total search duration across executions
* value: 43.74647887323944,
* },
* by_execution_status: {
* "doc_count_error_upper_bound":0,
* "sum_other_doc_count":0,
* "buckets":[
* {"key":"success","doc_count":48},
* {"key":"failure","doc_count":1}
* ]
* }
* }
*/

View file

@ -161,6 +161,8 @@ export function telemetryTaskRunner(
dailyExecutionCounts.countFailedExecutionsByReason,
count_rules_executions_failured_by_reason_by_type_per_day:
dailyExecutionCounts.countFailedExecutionsByReasonByType,
count_rules_by_execution_status_per_day:
dailyExecutionCounts.countRulesByExecutionStatus,
count_rules_executions_timeouts_per_day:
dailyExecutionTimeoutCounts.countExecutionTimeouts,
count_rules_executions_timeouts_by_type_per_day:

View file

@ -42,6 +42,7 @@ export interface AlertingUsage {
count_rules_snoozed: number;
count_rules_muted: number;
count_rules_with_muted_alerts: number;
count_rules_by_execution_status_per_day: Record<string, number>;
percentile_num_generated_actions_per_day: {
p50: number;
p90: number;

View file

@ -265,6 +265,23 @@
"type": "long"
}
}
},
"count_connector_types_by_action_run_outcome_per_day": {
"properties": {
"DYNAMIC_KEY": {
"properties": {
"success": {
"type": "long"
},
"failure": {
"type": "long"
},
"unknown": {
"type": "long"
}
}
}
}
}
}
},
@ -1693,6 +1710,19 @@
}
}
},
"count_rules_by_execution_status_per_day": {
"properties": {
"success": {
"type": "long"
},
"failure": {
"type": "long"
},
"unknown": {
"type": "long"
}
}
},
"avg_execution_time_per_day": {
"type": "long"
},

View file

@ -246,6 +246,10 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F
expect(telemetry.count_actions_executions_failed_by_type_per_day['test.throw'] > 0).to.be(
true
);
expect(
telemetry.count_connector_types_by_action_run_outcome_per_day['test.throw'].failure
).to.greaterThan(0);
}
function verifyAlertingTelemetry(telemetry: any) {
@ -528,6 +532,9 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F
// eslint-disable-next-line @typescript-eslint/naming-convention
alertsFixture: { test__noop: 9, test__throw: 9, __slack: 3 },
});
expect(telemetry.count_rules_by_execution_status_per_day.failure).to.greaterThan(0);
expect(telemetry.count_rules_by_execution_status_per_day.success).to.greaterThan(0);
}
it('should retrieve telemetry data in the expected format', async () => {