mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 01:38:56 -04:00
[APM] Transaction Duration Anomaly rule fires alerts for other detector types (#127973)
* adding detector filter * removing console * addressing pr comments * pr changes * fixing * reverting * fixing alerts rules and adding synthtrace sample * renaming file * using unit to check dates * removing console * removing synthtrace scenario * api test * creating api test Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
parent
0427952e76
commit
4a76b11431
5 changed files with 202 additions and 23 deletions
|
@ -60,7 +60,7 @@ export function TransactionDurationAnomalyAlertTrigger(props: Props) {
|
|||
...ruleParams,
|
||||
},
|
||||
{
|
||||
windowSize: 15,
|
||||
windowSize: 30,
|
||||
windowUnit: 'm',
|
||||
anomalySeverityType: ANOMALY_SEVERITY.CRITICAL,
|
||||
environment: ENVIRONMENT_ALL.value,
|
||||
|
|
|
@ -57,6 +57,7 @@ const { argv } = yargs(process.argv.slice(2))
|
|||
const { trial, server, runner, grep, inspect } = argv;
|
||||
|
||||
const license = trial ? 'trial' : 'basic';
|
||||
|
||||
console.log(`License: ${license}`);
|
||||
|
||||
let ftrScript = 'functional_tests';
|
||||
|
|
|
@ -4,44 +4,48 @@
|
|||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { schema } from '@kbn/config-schema';
|
||||
import { compact } from 'lodash';
|
||||
import { ESSearchResponse } from 'src/core/types/elasticsearch';
|
||||
import datemath from '@elastic/datemath';
|
||||
import { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
|
||||
import { schema } from '@kbn/config-schema';
|
||||
import {
|
||||
ALERT_EVALUATION_THRESHOLD,
|
||||
ALERT_EVALUATION_VALUE,
|
||||
ALERT_SEVERITY,
|
||||
ALERT_REASON,
|
||||
ALERT_SEVERITY,
|
||||
} from '@kbn/rule-data-utils';
|
||||
import { createLifecycleRuleTypeFactory } from '../../../../rule_registry/server';
|
||||
import { ProcessorEvent } from '../../../common/processor_event';
|
||||
import { getSeverity } from '../../../common/anomaly_detection';
|
||||
import {
|
||||
PROCESSOR_EVENT,
|
||||
SERVICE_NAME,
|
||||
TRANSACTION_TYPE,
|
||||
SERVICE_ENVIRONMENT,
|
||||
} from '../../../common/elasticsearch_fieldnames';
|
||||
import { getAlertUrlTransaction } from '../../../common/utils/formatters';
|
||||
import { asMutableArray } from '../../../common/utils/as_mutable_array';
|
||||
import { ANOMALY_SEVERITY } from '../../../common/ml_constants';
|
||||
import { compact } from 'lodash';
|
||||
import { ESSearchResponse } from 'src/core/types/elasticsearch';
|
||||
import { KibanaRequest } from '../../../../../../src/core/server';
|
||||
import { termQuery } from '../../../../observability/server';
|
||||
import { createLifecycleRuleTypeFactory } from '../../../../rule_registry/server';
|
||||
import {
|
||||
AlertType,
|
||||
ALERT_TYPES_CONFIG,
|
||||
ANOMALY_ALERT_SEVERITY_TYPES,
|
||||
formatAnomalyReason,
|
||||
} from '../../../common/alert_types';
|
||||
import { getMLJobs } from '../service_map/get_service_anomalies';
|
||||
import { apmActionVariables } from './action_variables';
|
||||
import { RegisterRuleDependencies } from './register_apm_alerts';
|
||||
import { getSeverity } from '../../../common/anomaly_detection';
|
||||
import {
|
||||
ApmMlDetectorType,
|
||||
getApmMlDetectorIndex,
|
||||
} from '../../../common/anomaly_detection/apm_ml_detectors';
|
||||
import {
|
||||
PROCESSOR_EVENT,
|
||||
SERVICE_ENVIRONMENT,
|
||||
SERVICE_NAME,
|
||||
TRANSACTION_TYPE,
|
||||
} from '../../../common/elasticsearch_fieldnames';
|
||||
import {
|
||||
getEnvironmentEsField,
|
||||
getEnvironmentLabel,
|
||||
} from '../../../common/environment_filter_values';
|
||||
import { termQuery } from '../../../../observability/server';
|
||||
import { ANOMALY_SEVERITY } from '../../../common/ml_constants';
|
||||
import { ProcessorEvent } from '../../../common/processor_event';
|
||||
import { asMutableArray } from '../../../common/utils/as_mutable_array';
|
||||
import { getAlertUrlTransaction } from '../../../common/utils/formatters';
|
||||
import { getMLJobs } from '../service_map/get_service_anomalies';
|
||||
import { apmActionVariables } from './action_variables';
|
||||
import { RegisterRuleDependencies } from './register_apm_alerts';
|
||||
|
||||
const paramsSchema = schema.object({
|
||||
serviceName: schema.maybe(schema.string()),
|
||||
|
@ -130,6 +134,14 @@ export function registerAnomalyAlertType({
|
|||
return {};
|
||||
}
|
||||
|
||||
// start time must be at least 30, does like this to support rules created before this change where default was 15
|
||||
const startTime = Math.min(
|
||||
datemath.parse('now-30m')!.valueOf(),
|
||||
datemath
|
||||
.parse(`now-${ruleParams.windowSize}${ruleParams.windowUnit}`)
|
||||
?.valueOf() || 0
|
||||
);
|
||||
|
||||
const jobIds = mlJobs.map((job) => job.jobId);
|
||||
const anomalySearchParams = {
|
||||
body: {
|
||||
|
@ -143,13 +155,17 @@ export function registerAnomalyAlertType({
|
|||
{
|
||||
range: {
|
||||
timestamp: {
|
||||
gte: `now-${ruleParams.windowSize}${ruleParams.windowUnit}`,
|
||||
gte: startTime,
|
||||
format: 'epoch_millis',
|
||||
},
|
||||
},
|
||||
},
|
||||
...termQuery('partition_field_value', ruleParams.serviceName),
|
||||
...termQuery('by_field_value', ruleParams.transactionType),
|
||||
...termQuery(
|
||||
'detector_index',
|
||||
getApmMlDetectorIndex(ApmMlDetectorType.txLatency)
|
||||
),
|
||||
] as QueryDslQueryContainer[],
|
||||
},
|
||||
},
|
||||
|
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
import { apm, timerange } from '@elastic/apm-synthtrace';
|
||||
import datemath from '@elastic/datemath';
|
||||
import expect from '@kbn/expect';
|
||||
import { range } from 'lodash';
|
||||
import { AlertType } from '../../../../plugins/apm/common/alert_types';
|
||||
import { FtrProviderContext } from '../../common/ftr_provider_context';
|
||||
import { createAndRunApmMlJob } from '../../common/utils/create_and_run_apm_ml_job';
|
||||
import { waitForRuleStatus } from './wait_for_rule_status';
|
||||
|
||||
export default function ApiTest({ getService }: FtrProviderContext) {
|
||||
const registry = getService('registry');
|
||||
const synthtraceEsClient = getService('synthtraceEsClient');
|
||||
const ml = getService('ml');
|
||||
const supertest = getService('supertest');
|
||||
const log = getService('log');
|
||||
|
||||
registry.when(
|
||||
'fetching service anomalies with a trial license',
|
||||
{ config: 'trial', archives: ['apm_mappings_only_8.0.0'] },
|
||||
() => {
|
||||
const spikeStart = datemath.parse('now-2h')!.valueOf();
|
||||
const spikeEnd = datemath.parse('now')!.valueOf();
|
||||
|
||||
const start = datemath.parse('now-2w')!.valueOf();
|
||||
const end = datemath.parse('now')!.valueOf();
|
||||
|
||||
const NORMAL_DURATION = 100;
|
||||
const NORMAL_RATE = 1;
|
||||
|
||||
let ruleId: string | undefined;
|
||||
|
||||
before(async () => {
|
||||
const serviceA = apm.service('service-a', 'production', 'java').instance('a');
|
||||
|
||||
const events = timerange(new Date(start).getTime(), new Date(end).getTime())
|
||||
.interval('1m')
|
||||
.rate(1)
|
||||
.spans((timestamp) => {
|
||||
const isInSpike = timestamp >= spikeStart && timestamp < spikeEnd;
|
||||
const count = isInSpike ? 4 : NORMAL_RATE;
|
||||
const duration = isInSpike ? 1000 : NORMAL_DURATION;
|
||||
const outcome = isInSpike ? 'failure' : 'success';
|
||||
|
||||
return [
|
||||
...range(0, count).flatMap((_) =>
|
||||
serviceA
|
||||
.transaction('tx', 'request')
|
||||
.timestamp(timestamp)
|
||||
.duration(duration)
|
||||
.outcome(outcome)
|
||||
.serialize()
|
||||
),
|
||||
];
|
||||
});
|
||||
|
||||
await synthtraceEsClient.index(events);
|
||||
await createAndRunApmMlJob({ environment: 'production', ml });
|
||||
const { body: createdRule } = await supertest
|
||||
.post(`/api/alerting/rule`)
|
||||
.set('kbn-xsrf', 'foo')
|
||||
.send({
|
||||
params: {
|
||||
environment: 'production',
|
||||
serviceName: 'service-a',
|
||||
transactionType: 'request',
|
||||
windowSize: 30,
|
||||
windowUnit: 'm',
|
||||
anomalySeverityType: 'warning',
|
||||
},
|
||||
consumer: 'apm',
|
||||
schedule: {
|
||||
interval: '1m',
|
||||
},
|
||||
tags: ['apm', 'service.name:service-a'],
|
||||
name: 'Latency anomaly | service-a',
|
||||
rule_type_id: AlertType.Anomaly,
|
||||
notify_when: 'onActiveAlert',
|
||||
actions: [],
|
||||
});
|
||||
ruleId = createdRule.id;
|
||||
});
|
||||
|
||||
after(async () => {
|
||||
await synthtraceEsClient.clean();
|
||||
await ml.cleanMlIndices();
|
||||
await supertest.delete(`/api/alerting/rule/${ruleId}`).set('kbn-xsrf', 'foo');
|
||||
});
|
||||
|
||||
it('checks if alert is active', async () => {
|
||||
const executionStatus = await waitForRuleStatus({
|
||||
id: ruleId,
|
||||
expectedStatus: 'active',
|
||||
supertest,
|
||||
log,
|
||||
});
|
||||
expect(executionStatus.status).to.be('active');
|
||||
});
|
||||
}
|
||||
);
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
import { ToolingLog } from '@kbn/dev-utils';
|
||||
import expect from '@kbn/expect';
|
||||
import type SuperTest from 'supertest';
|
||||
|
||||
const WAIT_FOR_STATUS_INCREMENT = 500;
|
||||
|
||||
export async function waitForRuleStatus({
|
||||
id,
|
||||
expectedStatus,
|
||||
waitMillis = 10000,
|
||||
supertest,
|
||||
log,
|
||||
}: {
|
||||
expectedStatus: string;
|
||||
supertest: SuperTest.SuperTest<SuperTest.Test>;
|
||||
log: ToolingLog;
|
||||
waitMillis?: number;
|
||||
id?: string;
|
||||
}): Promise<Record<string, any>> {
|
||||
if (waitMillis < 0 || !id) {
|
||||
expect().fail(`waiting for alert ${id} status ${expectedStatus} timed out`);
|
||||
}
|
||||
|
||||
const response = await supertest.get(`/api/alerting/rule/${id}`);
|
||||
expect(response.status).to.eql(200);
|
||||
|
||||
const { execution_status: executionStatus } = response.body || {};
|
||||
const { status } = executionStatus || {};
|
||||
|
||||
const message = `waitForStatus(${expectedStatus}): got ${JSON.stringify(executionStatus)}`;
|
||||
|
||||
if (status === expectedStatus) {
|
||||
return executionStatus;
|
||||
}
|
||||
|
||||
log.debug(`${message}, retrying`);
|
||||
|
||||
await delay(WAIT_FOR_STATUS_INCREMENT);
|
||||
return await waitForRuleStatus({
|
||||
id,
|
||||
expectedStatus,
|
||||
waitMillis: waitMillis - WAIT_FOR_STATUS_INCREMENT,
|
||||
supertest,
|
||||
log,
|
||||
});
|
||||
}
|
||||
|
||||
async function delay(millis: number): Promise<void> {
|
||||
await new Promise((resolve) => setTimeout(resolve, millis));
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue