[Obs AI Assistant] Improve context for contextual insights components on alert details page (#180766)

This adds an API call to the contextual insights component which should
improve the relevance of the responses by providing more context to the
LLM.

---------

Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
Co-authored-by: Cauê Marcondes <55978943+cauemarcondes@users.noreply.github.com>
This commit is contained in:
Søren Louv-Jansen 2024-04-16 19:09:04 +02:00 committed by GitHub
parent 91c8270aea
commit abec38e337
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 670 additions and 405 deletions

View file

@ -1,122 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { EuiFlexGroup, EuiFlexItem } from '@elastic/eui';
import {
createFunctionRequestMessage,
createFunctionResponseMessage,
} from '@kbn/observability-ai-assistant-plugin/public';
import React, { useCallback } from 'react';
import dedent from 'dedent';
import { i18n } from '@kbn/i18n';
import { callApmApi } from '../../../../services/rest/create_call_apm_api';
import {
SERVICE_ENVIRONMENT,
SERVICE_NAME,
TRANSACTION_NAME,
TRANSACTION_TYPE,
} from '../../../../../common/es_fields/apm';
import { useKibana } from '../../../../context/kibana_context/use_kibana';
import { AlertDetailsAppSectionProps } from './types';
export function AlertDetailContextualInsights({
alert,
}: {
alert: AlertDetailsAppSectionProps['alert'];
}) {
const {
services: { observabilityAIAssistant },
} = useKibana();
const ObservabilityAIAssistantContextualInsight =
observabilityAIAssistant?.ObservabilityAIAssistantContextualInsight;
const getPromptMessages = useCallback(async () => {
const {
serviceSummary,
downstreamDependencies,
logCategories,
serviceChangePoints,
exitSpanChangePoints,
anomalies,
} = await callApmApi(
'GET /internal/apm/assistant/get_apm_alert_details_context',
{
signal: null,
params: {
query: {
[SERVICE_NAME]: alert.fields[SERVICE_NAME],
[SERVICE_ENVIRONMENT]: alert.fields[SERVICE_ENVIRONMENT],
[TRANSACTION_TYPE]: alert.fields[TRANSACTION_TYPE],
[TRANSACTION_NAME]: alert.fields[TRANSACTION_NAME],
alert_started_at: new Date(alert.start).toISOString(),
},
},
}
);
const serviceName = alert.fields[SERVICE_NAME];
const serviceEnvironment = alert.fields[SERVICE_ENVIRONMENT];
const content = {
apmAlertContext: dedent(
`High level information about the service where the alert occurred. Use this as background but do not repeat this information to the user.
${JSON.stringify(serviceSummary)}
Downstream dependencies from the service "${serviceName}". Problems in these services can negatively affect the performance of "${serviceName}":
${JSON.stringify(downstreamDependencies)}
Significant change points for "${serviceName}". Use this to spot dips or spikes in throughput, latency and failure rate.
${JSON.stringify(serviceChangePoints)}
Significant change points for the dependencies of "${serviceName}". Use this to spot dips or spikes in throughput, latency and failure rate for downstream dependencies:
${JSON.stringify(exitSpanChangePoints)}
Log events occurring around the time of the alert. The log messages can sometimes diagnose the root cause of the alert:
${JSON.stringify(logCategories)}
Anomalies for services running in the environment "${serviceEnvironment}"
${anomalies}
Help the user understand the root cause of the alert by using the above information. Suggest actions the user should take to investigate further.
`
),
};
return [
createFunctionRequestMessage({
name: 'get_apm_alert_details_context',
args: {},
}).message,
createFunctionResponseMessage({
name: 'get_apm_alert_details_context',
content,
data: content,
}).message,
];
}, [alert]);
if (!ObservabilityAIAssistantContextualInsight) {
return null;
}
return (
<EuiFlexGroup direction="column" gutterSize="m">
<EuiFlexItem grow={false}>
<ObservabilityAIAssistantContextualInsight
title={i18n.translate(
'xpack.apm.alertDetailContextualInsights.InsightButtonLabel',
{ defaultMessage: 'Help me understand this alert' }
)}
messages={getPromptMessages}
/>
</EuiFlexItem>
</EuiFlexGroup>
);
}

View file

@ -1,240 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { ScopedAnnotationsClient } from '@kbn/observability-plugin/server';
import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server';
import type { CoreRequestHandlerContext, Logger } from '@kbn/core/server';
import moment from 'moment';
import * as t from 'io-ts';
import { LatencyAggregationType } from '../../../../common/latency_aggregation_types';
import type { MlClient } from '../../../lib/helpers/get_ml_client';
import type { APMEventClient } from '../../../lib/helpers/create_es_client/create_apm_event_client';
import type { ApmAlertsClient } from '../../../lib/helpers/get_apm_alerts_client';
import { getApmServiceSummary } from '../get_apm_service_summary';
import { getAssistantDownstreamDependencies } from '../get_apm_downstream_dependencies';
import { getLogCategories } from '../get_log_categories';
import {
ApmTimeseriesType,
getApmTimeseries,
TimeseriesChangePoint,
} from '../get_apm_timeseries';
import { getAnomalies } from '../get_apm_service_summary/get_anomalies';
export const apmAlertDetailsContextRt = t.intersection([
t.type({
'service.name': t.string,
alert_started_at: t.string,
}),
t.partial({
'service.environment': t.string,
'transaction.type': t.string,
'transaction.name': t.string,
// alert fields
'host.name': t.string,
'container.id': t.string,
}),
]);
export async function getApmAlertDetailsContext({
coreContext,
alertStartedAt,
annotationsClient,
apmAlertsClient,
apmEventClient,
esClient,
logger,
mlClient,
query,
}: {
coreContext: CoreRequestHandlerContext;
annotationsClient?: ScopedAnnotationsClient;
apmAlertsClient: ApmAlertsClient;
alertStartedAt: string;
apmEventClient: APMEventClient;
esClient: ElasticsearchClient;
logger: Logger;
mlClient?: MlClient;
query: t.TypeOf<typeof apmAlertDetailsContextRt>;
}) {
const serviceSummaryPromise = getApmServiceSummary({
apmEventClient,
annotationsClient,
esClient,
apmAlertsClient,
mlClient,
logger,
arguments: {
'service.name': query['service.name'],
'service.environment': query['service.environment'],
start: moment(alertStartedAt).subtract(5, 'minute').toISOString(),
end: alertStartedAt,
},
});
const downstreamDependenciesPromise = getAssistantDownstreamDependencies({
apmEventClient,
arguments: {
'service.name': query['service.name'],
'service.environment': query['service.environment'],
start: moment(alertStartedAt).subtract(5, 'minute').toISOString(),
end: alertStartedAt,
},
});
const logCategoriesPromise = getLogCategories({
esClient,
coreContext,
arguments: {
start: moment(alertStartedAt).subtract(5, 'minute').toISOString(),
end: alertStartedAt,
'service.name': query['service.name'],
'host.name': query['host.name'],
'container.id': query['container.id'],
},
});
const serviceTimeseriesPromise = getApmTimeseries({
apmEventClient,
arguments: {
start: moment(alertStartedAt).subtract(12, 'hours').toISOString(),
end: alertStartedAt,
stats: [
{
title: 'Latency',
'service.name': query['service.name'],
'service.environment': query['service.environment'],
timeseries: {
name: ApmTimeseriesType.transactionLatency,
function: LatencyAggregationType.p95,
'transaction.type': query['transaction.type'],
'transaction.name': query['transaction.name'],
},
},
{
title: 'Throughput',
'service.name': query['service.name'],
'service.environment': query['service.environment'],
timeseries: {
name: ApmTimeseriesType.transactionThroughput,
'transaction.type': query['transaction.type'],
'transaction.name': query['transaction.name'],
},
},
{
title: 'Failure rate',
'service.name': query['service.name'],
'service.environment': query['service.environment'],
timeseries: {
name: ApmTimeseriesType.transactionFailureRate,
'transaction.type': query['transaction.type'],
'transaction.name': query['transaction.name'],
},
},
{
title: 'Error events',
'service.name': query['service.name'],
'service.environment': query['service.environment'],
timeseries: {
name: ApmTimeseriesType.errorEventRate,
},
},
],
},
});
const exitSpanTimeseriesPromise = getApmTimeseries({
apmEventClient,
arguments: {
start: moment(alertStartedAt).subtract(30, 'minute').toISOString(),
end: alertStartedAt,
stats: [
{
title: 'Exit span latency',
'service.name': query['service.name'],
'service.environment': query['service.environment'],
timeseries: {
name: ApmTimeseriesType.exitSpanLatency,
},
},
{
title: 'Exit span failure rate',
'service.name': query['service.name'],
'service.environment': query['service.environment'],
timeseries: {
name: ApmTimeseriesType.exitSpanFailureRate,
},
},
],
},
});
const anomaliesPromise = getAnomalies({
start: moment(alertStartedAt).subtract(1, 'hour').valueOf(),
end: moment(alertStartedAt).valueOf(),
environment: query['service.environment'],
mlClient,
logger,
});
const [
serviceSummary,
downstreamDependencies,
logCategories,
serviceTimeseries,
exitSpanTimeseries,
anomalies,
] = await Promise.all([
serviceSummaryPromise,
downstreamDependenciesPromise,
logCategoriesPromise,
serviceTimeseriesPromise,
exitSpanTimeseriesPromise,
anomaliesPromise,
]);
const serviceChangePoints = serviceTimeseries.map(
(
timeseries
): {
title: string;
grouping: string;
changes: TimeseriesChangePoint[];
} => {
return {
title: timeseries.stat.title,
grouping: timeseries.id,
changes: timeseries.changes,
};
}
);
const exitSpanChangePoints = exitSpanTimeseries.map(
(
timeseries
): {
title: string;
grouping: string;
changes: TimeseriesChangePoint[];
} => {
return {
title: timeseries.stat.title,
grouping: timeseries.id,
changes: timeseries.changes,
};
}
);
return {
serviceSummary,
downstreamDependencies,
logCategories,
serviceChangePoints,
exitSpanChangePoints,
anomalies,
};
}

View file

@ -140,17 +140,15 @@ export async function getApmServiceSummary({
apmAlertsClient.search({
size: 100,
track_total_hits: false,
body: {
query: {
bool: {
filter: [
...termQuery(ALERT_RULE_PRODUCER, 'apm'),
...termQuery(ALERT_STATUS, ALERT_STATUS_ACTIVE),
...rangeQuery(start, end),
...termQuery(SERVICE_NAME, serviceName),
...environmentQuery(environment),
],
},
query: {
bool: {
filter: [
...termQuery(ALERT_RULE_PRODUCER, 'apm'),
...termQuery(ALERT_STATUS, ALERT_STATUS_ACTIVE),
...rangeQuery(start, end),
...termQuery(SERVICE_NAME, serviceName),
...environmentQuery(environment),
],
},
},
}),

View file

@ -221,7 +221,9 @@ export async function getApmTimeseries({
end,
unit: statResult.unit,
changes: [
...(changePointType && changePointType !== 'indeterminable'
...(changePointType &&
changePointType !== 'indeterminable' &&
changePointType !== 'stationary'
? [
{
date: statResult.change_point.bucket?.key,

View file

@ -18,7 +18,7 @@ import { getTypedSearch } from '../../../utils/create_typed_es_client';
export type LogCategories =
| Array<{
key: string;
errorCategory: string;
docCount: number;
sampleMessage: string;
}>
@ -79,7 +79,9 @@ export async function getLogCategories({
query,
});
const totalDocCount = hitCountRes.hits.total.value;
const samplingProbability = Math.min(100_000 / totalDocCount, 1);
const rawSamplingProbability = Math.min(100_000 / totalDocCount, 1);
const samplingProbability =
rawSamplingProbability < 0.5 ? rawSamplingProbability : 1;
const categorizedLogsRes = await search({
index,
@ -116,7 +118,7 @@ export async function getLogCategories({
({ doc_count: docCount, key, sample }) => {
const sampleMessage = (sample.hits.hits[0]._source as { message: string })
.message;
return { key: key as string, docCount, sampleMessage };
return { errorCategory: key as string, docCount, sampleMessage };
}
);
}

View file

@ -0,0 +1,126 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server';
import { CoreRequestHandlerContext } from '@kbn/core-http-request-handler-context-server';
import { aiAssistantLogsIndexPattern } from '@kbn/observability-ai-assistant-plugin/common';
import {
rangeQuery,
termQuery,
typedSearch,
} from '@kbn/observability-plugin/server/utils/queries';
import * as t from 'io-ts';
import moment from 'moment';
import { ApmDocumentType } from '../../../../common/document_type';
import { APMEventClient } from '../../../lib/helpers/create_es_client/create_apm_event_client';
import { observabilityAlertDetailsContextRt } from '.';
import { RollupInterval } from '../../../../common/rollup';
export async function getContainerIdFromSignals({
query,
esClient,
coreContext,
apmEventClient,
}: {
query: t.TypeOf<typeof observabilityAlertDetailsContextRt>;
esClient: ElasticsearchClient;
coreContext: CoreRequestHandlerContext;
apmEventClient: APMEventClient;
}) {
if (query['container.id']) {
return query['container.id'];
}
if (query['service.name']) {
const containerId = await getContainerIdFromTrace({
query,
apmEventClient,
});
if (containerId) {
return containerId;
}
return getContainerIdFromLogs({ query, esClient, coreContext });
}
}
async function getContainerIdFromLogs({
query,
esClient,
coreContext,
}: {
query: t.TypeOf<typeof observabilityAlertDetailsContextRt>;
esClient: ElasticsearchClient;
coreContext: CoreRequestHandlerContext;
}) {
const index =
(await coreContext.uiSettings.client.get<string>(
aiAssistantLogsIndexPattern
)) ?? 'logs-*';
const start = moment(query.alert_started_at).subtract(30, 'minutes').unix();
const end = moment(query.alert_started_at).unix();
const res = await typedSearch<{ container: { id: string } }, any>(esClient, {
index,
_source: ['container.id'],
terminate_after: 1,
size: 1,
track_total_hits: false,
query: {
bool: {
filter: [
{ exists: { field: 'container.id' } },
...termQuery('service.name', query['service.name']),
...rangeQuery(start, end),
],
},
},
});
return res.hits.hits[0]?._source?.container?.id;
}
async function getContainerIdFromTrace({
query,
apmEventClient,
}: {
query: t.TypeOf<typeof observabilityAlertDetailsContextRt>;
apmEventClient: APMEventClient;
}) {
const start = moment(query.alert_started_at).subtract(30, 'minutes').unix();
const end = moment(query.alert_started_at).unix();
const res = await apmEventClient.search('get_container_id', {
apm: {
sources: [
{
documentType: ApmDocumentType.TransactionEvent,
rollupInterval: RollupInterval.None,
},
],
},
body: {
_source: ['container.id'],
terminate_after: 1,
size: 1,
track_total_hits: false,
query: {
bool: {
filter: [
{ exists: { field: 'container.id' } },
...termQuery('service.name', query['service.name']),
...rangeQuery(start, end),
],
},
},
},
});
return res.hits.hits[0]?._source.container?.id;
}

View file

@ -0,0 +1,126 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server';
import { CoreRequestHandlerContext } from '@kbn/core-http-request-handler-context-server';
import { aiAssistantLogsIndexPattern } from '@kbn/observability-ai-assistant-plugin/common';
import {
rangeQuery,
termQuery,
typedSearch,
} from '@kbn/observability-plugin/server/utils/queries';
import * as t from 'io-ts';
import moment from 'moment';
import { ApmDocumentType } from '../../../../common/document_type';
import { APMEventClient } from '../../../lib/helpers/create_es_client/create_apm_event_client';
import { observabilityAlertDetailsContextRt } from '.';
import { RollupInterval } from '../../../../common/rollup';
export async function getServiceNameFromSignals({
query,
esClient,
coreContext,
apmEventClient,
}: {
query: t.TypeOf<typeof observabilityAlertDetailsContextRt>;
esClient: ElasticsearchClient;
coreContext: CoreRequestHandlerContext;
apmEventClient: APMEventClient;
}) {
if (query['service.name']) {
return query['service.name'];
}
if (query['container.id']) {
const serviceName = await getServiceNameFromTraces({
query,
apmEventClient,
});
if (serviceName) {
return serviceName;
}
return getServiceNameFromLogs({ query, esClient, coreContext });
}
}
async function getServiceNameFromLogs({
query,
esClient,
coreContext,
}: {
query: t.TypeOf<typeof observabilityAlertDetailsContextRt>;
esClient: ElasticsearchClient;
coreContext: CoreRequestHandlerContext;
}) {
const index =
(await coreContext.uiSettings.client.get<string>(
aiAssistantLogsIndexPattern
)) ?? 'logs-*';
const start = moment(query.alert_started_at).subtract(30, 'minutes').unix();
const end = moment(query.alert_started_at).unix();
const res = await typedSearch<{ service: { name: string } }, any>(esClient, {
index,
_source: ['service.name'],
terminate_after: 1,
size: 1,
track_total_hits: false,
query: {
bool: {
filter: [
{ exists: { field: 'service.name' } },
...termQuery('container.id', query['container.id']),
...rangeQuery(start, end),
],
},
},
});
return res.hits.hits[0]?._source?.service?.name;
}
async function getServiceNameFromTraces({
query,
apmEventClient,
}: {
query: t.TypeOf<typeof observabilityAlertDetailsContextRt>;
apmEventClient: APMEventClient;
}) {
const start = moment(query.alert_started_at).subtract(30, 'minutes').unix();
const end = moment(query.alert_started_at).unix();
const res = await apmEventClient.search('get_service_name', {
apm: {
sources: [
{
documentType: ApmDocumentType.TransactionEvent,
rollupInterval: RollupInterval.None,
},
],
},
body: {
_source: ['service.name'],
terminate_after: 1,
size: 1,
track_total_hits: false,
query: {
bool: {
filter: [
{ exists: { field: 'service.name' } },
...termQuery('container.id', query['container.id']),
...rangeQuery(start, end),
],
},
},
},
});
return res.hits.hits[0]?._source.service.name;
}

View file

@ -0,0 +1,297 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { ScopedAnnotationsClient } from '@kbn/observability-plugin/server';
import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server';
import type { CoreRequestHandlerContext, Logger } from '@kbn/core/server';
import moment from 'moment';
import * as t from 'io-ts';
import { LatencyAggregationType } from '../../../../common/latency_aggregation_types';
import type { MlClient } from '../../../lib/helpers/get_ml_client';
import type { APMEventClient } from '../../../lib/helpers/create_es_client/create_apm_event_client';
import type { ApmAlertsClient } from '../../../lib/helpers/get_apm_alerts_client';
import { getApmServiceSummary } from '../get_apm_service_summary';
import { getAssistantDownstreamDependencies } from '../get_apm_downstream_dependencies';
import { getLogCategories } from '../get_log_categories';
import { ApmTimeseriesType, getApmTimeseries } from '../get_apm_timeseries';
import { getAnomalies } from '../get_apm_service_summary/get_anomalies';
import { getServiceNameFromSignals } from './get_service_name_from_signals';
import { getContainerIdFromSignals } from './get_container_id_from_signals';
export const observabilityAlertDetailsContextRt = t.intersection([
t.type({
alert_started_at: t.string,
}),
t.partial({
// apm fields
'service.name': t.string,
'service.environment': t.string,
'transaction.type': t.string,
'transaction.name': t.string,
// infrastructure fields
'host.name': t.string,
'container.id': t.string,
}),
]);
export async function getObservabilityAlertDetailsContext({
coreContext,
annotationsClient,
apmAlertsClient,
apmEventClient,
esClient,
logger,
mlClient,
query,
}: {
coreContext: CoreRequestHandlerContext;
annotationsClient?: ScopedAnnotationsClient;
apmAlertsClient: ApmAlertsClient;
apmEventClient: APMEventClient;
esClient: ElasticsearchClient;
logger: Logger;
mlClient?: MlClient;
query: t.TypeOf<typeof observabilityAlertDetailsContextRt>;
}) {
const alertStartedAt = query.alert_started_at;
const serviceEnvironment = query['service.environment'];
const hostName = query['host.name'];
const [serviceName, containerId] = await Promise.all([
getServiceNameFromSignals({
query,
esClient,
coreContext,
apmEventClient,
}),
getContainerIdFromSignals({
query,
esClient,
coreContext,
apmEventClient,
}),
]);
const serviceSummaryPromise = serviceName
? getApmServiceSummary({
apmEventClient,
annotationsClient,
esClient,
apmAlertsClient,
mlClient,
logger,
arguments: {
'service.name': serviceName,
'service.environment': serviceEnvironment,
start: moment(alertStartedAt).subtract(5, 'minute').toISOString(),
end: alertStartedAt,
},
})
: undefined;
const downstreamDependenciesPromise = serviceName
? getAssistantDownstreamDependencies({
apmEventClient,
arguments: {
'service.name': serviceName,
'service.environment': serviceEnvironment,
start: moment(alertStartedAt).subtract(5, 'minute').toISOString(),
end: alertStartedAt,
},
})
: undefined;
const logCategoriesPromise = getLogCategories({
esClient,
coreContext,
arguments: {
start: moment(alertStartedAt).subtract(5, 'minute').toISOString(),
end: alertStartedAt,
'service.name': serviceName,
'host.name': hostName,
'container.id': containerId,
},
});
const serviceChangePointsPromise = getServiceChangePoints({
apmEventClient,
alertStartedAt,
serviceName,
serviceEnvironment,
transactionType: query['transaction.type'],
transactionName: query['transaction.name'],
});
const exitSpanChangePointsPromise = getExitSpanChangePoints({
apmEventClient,
alertStartedAt,
serviceName,
serviceEnvironment,
});
const anomaliesPromise = getAnomalies({
start: moment(alertStartedAt).subtract(1, 'hour').valueOf(),
end: moment(alertStartedAt).valueOf(),
environment: serviceEnvironment,
mlClient,
logger,
});
const [
serviceSummary,
downstreamDependencies,
logCategories,
serviceChangePoints,
exitSpanChangePoints,
anomalies,
] = await Promise.all([
serviceSummaryPromise,
downstreamDependenciesPromise,
logCategoriesPromise,
serviceChangePointsPromise,
exitSpanChangePointsPromise,
anomaliesPromise,
]);
return {
serviceSummary,
downstreamDependencies,
logCategories,
serviceChangePoints,
exitSpanChangePoints,
anomalies,
};
}
async function getServiceChangePoints({
apmEventClient,
alertStartedAt,
serviceName,
serviceEnvironment,
transactionType,
transactionName,
}: {
apmEventClient: APMEventClient;
alertStartedAt: string;
serviceName: string | undefined;
serviceEnvironment: string | undefined;
transactionType: string | undefined;
transactionName: string | undefined;
}) {
if (!serviceName) {
return [];
}
const res = await getApmTimeseries({
apmEventClient,
arguments: {
start: moment(alertStartedAt).subtract(12, 'hours').toISOString(),
end: alertStartedAt,
stats: [
{
title: 'Latency',
'service.name': serviceName,
'service.environment': serviceEnvironment,
timeseries: {
name: ApmTimeseriesType.transactionLatency,
function: LatencyAggregationType.p95,
'transaction.type': transactionType,
'transaction.name': transactionName,
},
},
{
title: 'Throughput',
'service.name': serviceName,
'service.environment': serviceEnvironment,
timeseries: {
name: ApmTimeseriesType.transactionThroughput,
'transaction.type': transactionType,
'transaction.name': transactionName,
},
},
{
title: 'Failure rate',
'service.name': serviceName,
'service.environment': serviceEnvironment,
timeseries: {
name: ApmTimeseriesType.transactionFailureRate,
'transaction.type': transactionType,
'transaction.name': transactionName,
},
},
{
title: 'Error events',
'service.name': serviceName,
'service.environment': serviceEnvironment,
timeseries: {
name: ApmTimeseriesType.errorEventRate,
},
},
],
},
});
return res
.filter((timeseries) => timeseries.changes.length > 0)
.map((timeseries) => ({
title: timeseries.stat.title,
grouping: timeseries.id,
changes: timeseries.changes,
}));
}
async function getExitSpanChangePoints({
apmEventClient,
alertStartedAt,
serviceName,
serviceEnvironment,
}: {
apmEventClient: APMEventClient;
alertStartedAt: string;
serviceName: string | undefined;
serviceEnvironment: string | undefined;
}) {
if (!serviceName) {
return [];
}
const res = await getApmTimeseries({
apmEventClient,
arguments: {
start: moment(alertStartedAt).subtract(30, 'minute').toISOString(),
end: alertStartedAt,
stats: [
{
title: 'Exit span latency',
'service.name': serviceName,
'service.environment': serviceEnvironment,
timeseries: {
name: ApmTimeseriesType.exitSpanLatency,
},
},
{
title: 'Exit span failure rate',
'service.name': serviceName,
'service.environment': serviceEnvironment,
timeseries: {
name: ApmTimeseriesType.exitSpanFailureRate,
},
},
],
},
});
return res
.filter((timeseries) => timeseries.changes.length > 0)
.map((timeseries) => {
return {
title: timeseries.stat.title,
grouping: timeseries.id,
changes: timeseries.changes,
};
});
}

View file

@ -12,9 +12,9 @@ import { getMlClient } from '../../lib/helpers/get_ml_client';
import { getRandomSampler } from '../../lib/helpers/get_random_sampler';
import { createApmServerRoute } from '../apm_routes/create_apm_server_route';
import {
apmAlertDetailsContextRt,
getApmAlertDetailsContext,
} from './get_apm_alert_details_context';
observabilityAlertDetailsContextRt,
getObservabilityAlertDetailsContext,
} from './get_observability_alert_details_context';
import {
downstreamDependenciesRouteRt,
@ -31,20 +31,20 @@ import {
} from './get_apm_timeseries';
import { LogCategories } from './get_log_categories';
const getApmAlertDetailsContextRoute = createApmServerRoute({
endpoint: 'GET /internal/apm/assistant/get_apm_alert_details_context',
const getObservabilityAlertDetailsContextRoute = createApmServerRoute({
endpoint: 'GET /internal/apm/assistant/get_obs_alert_details_context',
options: {
tags: ['access:apm', 'access:ai_assistant'],
},
params: t.type({
query: apmAlertDetailsContextRt,
query: observabilityAlertDetailsContextRt,
}),
handler: async (
resources
): Promise<{
serviceSummary: ServiceSummary;
downstreamDependencies: APMDownstreamDependency[];
serviceSummary?: ServiceSummary;
downstreamDependencies?: APMDownstreamDependency[];
logCategories: LogCategories;
serviceChangePoints: Array<{
title: string;
@ -59,8 +59,6 @@ const getApmAlertDetailsContextRoute = createApmServerRoute({
const { context, request, plugins, logger, params } = resources;
const { query } = params;
const alertStartedAt = query.alert_started_at;
const [
apmEventClient,
annotationsClient,
@ -81,9 +79,8 @@ const getApmAlertDetailsContextRoute = createApmServerRoute({
]);
const esClient = coreContext.elasticsearch.client.asCurrentUser;
return getApmAlertDetailsContext({
return getObservabilityAlertDetailsContext({
coreContext,
alertStartedAt,
annotationsClient,
apmAlertsClient,
apmEventClient,
@ -152,6 +149,6 @@ const getDownstreamDependenciesRoute = createApmServerRoute({
export const assistantRouteRepository = {
...getApmTimeSeriesRoute,
...getApmAlertDetailsContextRoute,
...getObservabilityAlertDetailsContextRoute,
...getDownstreamDependenciesRoute,
};

View file

@ -112,7 +112,8 @@
"@kbn/shared-svg",
"@kbn/shared-ux-utility",
"@kbn/management-settings-components-field-row",
"@kbn/shared-ux-markdown"
"@kbn/shared-ux-markdown",
"@kbn/core-http-request-handler-context-server"
],
"exclude": ["target/**/*"]
}

View file

@ -274,14 +274,10 @@ export function getScreenDescription(alertDetail: AlertData) {
: ''
}
The alert details are:
Use the following alert fields as background information for generating a response. Do not list them as bullet points in the response.
${Object.entries(getRelevantAlertFields(alertDetail))
.map(([key, value]) => `${key}: ${JSON.stringify(value)}`)
.join('\n')}
Do not repeat this information to the user, unless it is relevant for them to know.
Please suggestion root causes if possible.
Suggest next steps for the user to take.
`);
}

View file

@ -7,34 +7,116 @@
import { EuiFlexGroup, EuiFlexItem } from '@elastic/eui';
import React, { useMemo } from 'react';
import React, { useCallback } from 'react';
import { i18n } from '@kbn/i18n';
import dedent from 'dedent';
import { isEmpty } from 'lodash';
import { useKibana } from '../../utils/kibana_react';
import { AlertData } from '../../hooks/use_fetch_alert_detail';
export function AlertDetailContextualInsights({ alert }: { alert: AlertData | null }) {
const {
services: { observabilityAIAssistant },
services: { observabilityAIAssistant, http },
} = useKibana();
const ObservabilityAIAssistantContextualInsight =
observabilityAIAssistant?.ObservabilityAIAssistantContextualInsight;
const messages = useMemo(() => {
if (!observabilityAIAssistant) {
return null;
const getPromptMessages = useCallback(async () => {
const fields = alert?.formatted.fields as Record<string, string> | undefined;
if (!observabilityAIAssistant || !fields || !alert) {
return [];
}
const res = await http.get('/internal/apm/assistant/get_obs_alert_details_context', {
query: {
alert_started_at: new Date(alert.formatted.start).toISOString(),
// service fields
'service.name': fields['service.name'],
'service.environment': fields['service.environment'],
'transaction.type': fields['transaction.type'],
'transaction.name': fields['transaction.name'],
// infra fields
'host.name': fields['host.name'],
'container.id': fields['container.id'],
},
});
const {
serviceSummary,
downstreamDependencies,
logCategories,
serviceChangePoints,
exitSpanChangePoints,
anomalies,
} = res as any;
const serviceName = fields['service.name'];
const serviceEnvironment = fields['service.environment'];
const obsAlertContext = `${
!isEmpty(serviceSummary)
? `Metadata for the service where the alert occurred:
${JSON.stringify(serviceSummary, null, 2)}`
: ''
}
${
!isEmpty(downstreamDependencies)
? `Downstream dependencies from the service "${serviceName}". Problems in these services can negatively affect the performance of "${serviceName}":
${JSON.stringify(downstreamDependencies, null, 2)}`
: ''
}
${
!isEmpty(serviceChangePoints)
? `Significant change points for "${serviceName}". Use this to spot dips and spikes in throughput, latency and failure rate:
${JSON.stringify(serviceChangePoints, null, 2)}`
: ''
}
${
!isEmpty(exitSpanChangePoints)
? `Significant change points for the dependencies of "${serviceName}". Use this to spot dips or spikes in throughput, latency and failure rate for downstream dependencies:
${JSON.stringify(exitSpanChangePoints, null, 2)}`
: ''
}
${
!isEmpty(logCategories)
? `Log events occurring around the time of the alert:
${JSON.stringify(logCategories, null, 2)}`
: ''
}
${
!isEmpty(anomalies)
? `Anomalies for services running in the environment "${serviceEnvironment}":
${anomalies}`
: ''
}
`;
return observabilityAIAssistant.getContextualInsightMessages({
message: `I'm looking at an alert and trying to understand why it was triggered`,
instructions: dedent(
`I'm an SRE. I am looking at an alert that was triggered. I want to understand why it was triggered, what it means, and what I should do next.`
`I'm an SRE. I am looking at an alert that was triggered. I want to understand why it was triggered, what it means, and what I should do next.
The following contextual information is available to help me understand the alert:
${obsAlertContext}
Be brief and to the point.
Do not list the alert details as bullet points.
Do refer to the contextual information provided above when relevant.
Pay specific attention to why the alert happened and what may have contributed to it.
`
),
});
}, [observabilityAIAssistant]);
}, [alert, http, observabilityAIAssistant]);
if (!ObservabilityAIAssistantContextualInsight || !messages) {
if (!ObservabilityAIAssistantContextualInsight) {
return null;
}
@ -46,7 +128,7 @@ export function AlertDetailContextualInsights({ alert }: { alert: AlertData | nu
'xpack.observability.alertDetailContextualInsights.InsightButtonLabel',
{ defaultMessage: 'Help me understand this alert' }
)}
messages={messages}
messages={getPromptMessages}
/>
</EuiFlexItem>
</EuiFlexGroup>