mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 01:38:56 -04:00
[RCA] AI-assisted root cause analysis (#197200)
Implements an LLM-based root cause analysis process. At a high level, it works by investigating entities - which means pulling in alerts, SLOs, and log patterns. From there, it can inspect related entities to get to the root cause. The backend implementation lives in `x-pack/packages/observability_utils-*` (`service_rca`). It can be imported into any server-side plugin and executed from there. The UI changes are mostly contained to `x-pack/plugins/observability_solution/observabillity_ai_assistant_app`. This plugin now exports a `RootCauseAnalysisContainer` which takes a stream of data that is returned by the root cause analysis process. The current implementation lives in the Investigate app. There, it calls its own endpoint that kicks off the RCA process, and feeds it into the `RootCauseAnalysisContainer` exposed by the Observability AI Assistant app plugin. I've left it in a route there so the investigation itself can be updated as the process runs - this would allow the user to close the browser and come back later, and see a full investigation. > [!NOTE] > Notes for reviewing teams > > @kbn/es-types: > - support both types and typesWithBodyKey > - simplify KeysOfSources type > > @kbn/server-route-repository: > - abortable streamed responses > > @kbn/sse-utils*: > - abortable streamed responses > - serialize errors in specific format for more reliable re-hydration of errors > - keep connection open with SSE comments > > @kbn/inference-*: > - export *Of variants of types, for easier manual inference > - add automated retries for `output` API > - add `name` to tool responses for type inference (get type of tool response via tool name) > - add `data` to tool responses for transporting internal data (not sent to the LLM) > - simplify `chunksIntoMessage` > - allow consumers of nlToEsql task to add to `system` prompt > - add toolCallId to validation error message > > @kbn/aiops*: > - export `categorizationAnalyzer` for use in observability-ai* > > @kbn/observability-ai-assistant* > - configurable limit (tokens or doc count) for knowledge base recall > > @kbn/slo*: > - export client that returns summary indices --------- Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com> Co-authored-by: Maryam Saeidi <maryam.saeidi@elastic.co> Co-authored-by: Bena Kansara <bena.kansara@elastic.co>
This commit is contained in:
parent
64e97285fb
commit
fa1998ce92
144 changed files with 27287 additions and 358 deletions
|
@ -919,6 +919,7 @@ module.exports = {
|
|||
'x-pack/plugins/observability_solution/exploratory_view/**/*.{js,mjs,ts,tsx}',
|
||||
'x-pack/plugins/observability_solution/ux/**/*.{js,mjs,ts,tsx}',
|
||||
'x-pack/plugins/observability_solution/slo/**/*.{js,mjs,ts,tsx}',
|
||||
'x-pack/packages/observability/**/*.{js,mjs,ts,tsx}',
|
||||
],
|
||||
rules: {
|
||||
'no-console': ['warn', { allow: ['error'] }],
|
||||
|
@ -938,6 +939,7 @@ module.exports = {
|
|||
'x-pack/plugins/observability_solution/observability/**/*.stories.*',
|
||||
'x-pack/plugins/observability_solution/exploratory_view/**/*.stories.*',
|
||||
'x-pack/plugins/observability_solution/slo/**/*.stories.*',
|
||||
'x-pack/packages/observability/**/*.{js,mjs,ts,tsx}',
|
||||
],
|
||||
rules: {
|
||||
'react/function-component-definition': [
|
||||
|
|
2
.github/CODEOWNERS
vendored
2
.github/CODEOWNERS
vendored
|
@ -800,6 +800,8 @@ x-pack/packages/observability/alerting_rule_utils @elastic/obs-ux-management-tea
|
|||
x-pack/packages/observability/alerting_test_data @elastic/obs-ux-management-team
|
||||
x-pack/packages/observability/get_padded_alert_time_range_util @elastic/obs-ux-management-team
|
||||
x-pack/packages/observability/logs_overview @elastic/obs-ux-logs-team
|
||||
x-pack/packages/observability/observability_ai/observability_ai_common @elastic/obs-ai-assistant
|
||||
x-pack/packages/observability/observability_ai/observability_ai_server @elastic/obs-ai-assistant
|
||||
x-pack/packages/observability/observability_utils/observability_utils_browser @elastic/observability-ui
|
||||
x-pack/packages/observability/observability_utils/observability_utils_common @elastic/observability-ui
|
||||
x-pack/packages/observability/observability_utils/observability_utils_server @elastic/observability-ui
|
||||
|
|
|
@ -695,6 +695,8 @@
|
|||
"@kbn/observability-ai-assistant-app-plugin": "link:x-pack/plugins/observability_solution/observability_ai_assistant_app",
|
||||
"@kbn/observability-ai-assistant-management-plugin": "link:x-pack/plugins/observability_solution/observability_ai_assistant_management",
|
||||
"@kbn/observability-ai-assistant-plugin": "link:x-pack/plugins/observability_solution/observability_ai_assistant",
|
||||
"@kbn/observability-ai-common": "link:x-pack/packages/observability/observability_ai/observability_ai_common",
|
||||
"@kbn/observability-ai-server": "link:x-pack/packages/observability/observability_ai/observability_ai_server",
|
||||
"@kbn/observability-alert-details": "link:x-pack/packages/observability/alert_details",
|
||||
"@kbn/observability-alerting-rule-utils": "link:x-pack/packages/observability/alerting_rule_utils",
|
||||
"@kbn/observability-alerting-test-data": "link:x-pack/packages/observability/alerting_test_data",
|
||||
|
@ -1145,6 +1147,7 @@
|
|||
"fnv-plus": "^1.3.1",
|
||||
"formik": "^2.4.6",
|
||||
"fp-ts": "^2.3.1",
|
||||
"fuse.js": "^7.0.0",
|
||||
"get-port": "^5.0.0",
|
||||
"getopts": "^2.2.5",
|
||||
"getos": "^3.1.0",
|
||||
|
|
|
@ -13,6 +13,7 @@ export type {
|
|||
SearchHit,
|
||||
ESSearchResponse,
|
||||
ESSearchRequest,
|
||||
ESSearchRequestWithoutBody,
|
||||
ESSourceOptions,
|
||||
InferSearchResponseOf,
|
||||
AggregationResultOf,
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
*/
|
||||
|
||||
import * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
|
||||
import * as estypesWithoutBody from '@elastic/elasticsearch/lib/api/types';
|
||||
import type {
|
||||
Field,
|
||||
QueryDslFieldAndFormat,
|
||||
|
@ -26,6 +27,7 @@ import {
|
|||
|
||||
export type ESFilter = estypes.QueryDslQueryContainer;
|
||||
export type ESSearchRequest = estypes.SearchRequest;
|
||||
export type ESSearchRequestWithoutBody = estypesWithoutBody.SearchRequest;
|
||||
export type AggregationOptionsByType = Required<estypes.AggregationsAggregationContainer>;
|
||||
|
||||
// Typings for Elasticsearch queries and aggregations. These are intended to be
|
||||
|
|
|
@ -23,20 +23,15 @@ type InvalidAggregationRequest = unknown;
|
|||
// Union keys are not included in keyof, but extends iterates over the types in a union.
|
||||
type ValidAggregationKeysOf<T extends Record<string, any>> = T extends T ? keyof T : never;
|
||||
|
||||
type KeyOfSource<T> = Record<
|
||||
keyof T,
|
||||
(T extends Record<string, { terms: { missing_bucket: true } }> ? null : never) | string | number
|
||||
>;
|
||||
type KeyOfSource<T> = {
|
||||
[key in keyof T]:
|
||||
| (T[key] extends Record<string, { terms: { missing_bucket: true } }> ? null : never)
|
||||
| string
|
||||
| number;
|
||||
};
|
||||
|
||||
type KeysOfSources<T extends any[]> = T extends [any]
|
||||
? KeyOfSource<T[0]>
|
||||
: T extends [any, any]
|
||||
? KeyOfSource<T[0]> & KeyOfSource<T[1]>
|
||||
: T extends [any, any, any]
|
||||
? KeyOfSource<T[0]> & KeyOfSource<T[1]> & KeyOfSource<T[2]>
|
||||
: T extends [any, any, any, any]
|
||||
? KeyOfSource<T[0]> & KeyOfSource<T[1]> & KeyOfSource<T[2]> & KeyOfSource<T[3]>
|
||||
: Record<string, null | string | number>;
|
||||
// convert to intersection to be able to get all the keys
|
||||
type KeysOfSources<T extends any[]> = UnionToIntersection<KeyOfSource<ValuesType<Pick<T, number>>>>;
|
||||
|
||||
type CompositeKeysOf<TAggregationContainer extends AggregationsAggregationContainer> =
|
||||
TAggregationContainer extends {
|
||||
|
|
|
@ -24,6 +24,9 @@ const updateInvestigationParamsSchema = z.object({
|
|||
}),
|
||||
tags: z.array(z.string()),
|
||||
externalIncidentUrl: z.string().nullable(),
|
||||
rootCauseAnalysis: z.object({
|
||||
events: z.array(z.any()),
|
||||
}),
|
||||
})
|
||||
.partial(),
|
||||
});
|
||||
|
|
|
@ -35,6 +35,11 @@ const investigationSchema = z.object({
|
|||
notes: z.array(investigationNoteSchema),
|
||||
items: z.array(investigationItemSchema),
|
||||
externalIncidentUrl: z.string().nullable(),
|
||||
rootCauseAnalysis: z
|
||||
.object({
|
||||
events: z.array(z.any()),
|
||||
})
|
||||
.optional(),
|
||||
});
|
||||
|
||||
type Status = z.infer<typeof statusSchema>;
|
||||
|
|
|
@ -98,8 +98,15 @@ export function registerRoutes<TDependencies extends Record<string, any>>({
|
|||
if (isKibanaResponse(result)) {
|
||||
return result;
|
||||
} else if (isObservable(result)) {
|
||||
const controller = new AbortController();
|
||||
request.events.aborted$.subscribe(() => {
|
||||
controller.abort();
|
||||
});
|
||||
return response.ok({
|
||||
body: observableIntoEventSourceStream(result as Observable<ServerSentEvent>),
|
||||
body: observableIntoEventSourceStream(result as Observable<ServerSentEvent>, {
|
||||
logger,
|
||||
signal: controller.signal,
|
||||
}),
|
||||
});
|
||||
} else {
|
||||
const body = result || {};
|
||||
|
|
|
@ -0,0 +1,198 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
import { Logger } from '@kbn/logging';
|
||||
import { observableIntoEventSourceStream } from './observable_into_event_source_stream';
|
||||
import { PassThrough } from 'node:stream';
|
||||
import { Subject } from 'rxjs';
|
||||
import { ServerSentEvent, ServerSentEventType } from '@kbn/sse-utils/src/events';
|
||||
import {
|
||||
ServerSentEventErrorCode,
|
||||
createSSEInternalError,
|
||||
createSSERequestError,
|
||||
} from '@kbn/sse-utils/src/errors';
|
||||
|
||||
describe('observableIntoEventSourceStream', () => {
|
||||
let logger: jest.Mocked<Logger>;
|
||||
|
||||
let controller: AbortController;
|
||||
|
||||
let stream: PassThrough;
|
||||
let source$: Subject<ServerSentEvent>;
|
||||
|
||||
let data: string[];
|
||||
|
||||
beforeEach(() => {
|
||||
jest.useFakeTimers();
|
||||
logger = {
|
||||
debug: jest.fn(),
|
||||
error: jest.fn(),
|
||||
} as unknown as jest.Mocked<Logger>;
|
||||
|
||||
controller = new AbortController();
|
||||
source$ = new Subject();
|
||||
data = [];
|
||||
|
||||
stream = observableIntoEventSourceStream(source$, { logger, signal: controller.signal });
|
||||
stream.on('data', (chunk) => {
|
||||
data.push(chunk.toString());
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
jest.clearAllTimers();
|
||||
});
|
||||
|
||||
it('writes events into the stream in SSE format', () => {
|
||||
source$.next({ type: ServerSentEventType.data, data: { foo: 'bar' } });
|
||||
source$.complete();
|
||||
|
||||
jest.runAllTimers();
|
||||
|
||||
expect(data).toEqual(['event: data\ndata: {"data":{"foo":"bar"}}\n\n']);
|
||||
});
|
||||
|
||||
it('handles SSE errors', () => {
|
||||
const sseError = createSSEInternalError('Invalid input');
|
||||
|
||||
source$.error(sseError);
|
||||
|
||||
jest.runAllTimers();
|
||||
|
||||
expect(logger.error).toHaveBeenCalledWith(sseError);
|
||||
expect(logger.debug).toHaveBeenCalled();
|
||||
const debugFn = logger.debug.mock.calls[0][0] as () => string;
|
||||
const loggedError = JSON.parse(debugFn());
|
||||
expect(loggedError).toEqual({
|
||||
type: 'error',
|
||||
error: {
|
||||
code: ServerSentEventErrorCode.internalError,
|
||||
message: 'Invalid input',
|
||||
meta: {},
|
||||
},
|
||||
});
|
||||
|
||||
expect(data).toEqual([
|
||||
`event: error\ndata: ${JSON.stringify({
|
||||
error: {
|
||||
code: ServerSentEventErrorCode.internalError,
|
||||
message: 'Invalid input',
|
||||
meta: {},
|
||||
},
|
||||
})}\n\n`,
|
||||
]);
|
||||
});
|
||||
|
||||
it('handles SSE errors with metadata', () => {
|
||||
const sseError = createSSERequestError('Invalid request', 400);
|
||||
|
||||
source$.error(sseError);
|
||||
|
||||
jest.runAllTimers();
|
||||
|
||||
expect(logger.error).toHaveBeenCalledWith(sseError);
|
||||
expect(logger.debug).toHaveBeenCalled();
|
||||
const debugFn = logger.debug.mock.calls[0][0] as () => string;
|
||||
const loggedError = JSON.parse(debugFn());
|
||||
expect(loggedError).toEqual({
|
||||
type: 'error',
|
||||
error: {
|
||||
code: ServerSentEventErrorCode.requestError,
|
||||
message: 'Invalid request',
|
||||
meta: {
|
||||
status: 400,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(data).toEqual([
|
||||
`event: error\ndata: ${JSON.stringify({
|
||||
error: {
|
||||
code: ServerSentEventErrorCode.requestError,
|
||||
message: 'Invalid request',
|
||||
meta: {
|
||||
status: 400,
|
||||
},
|
||||
},
|
||||
})}\n\n`,
|
||||
]);
|
||||
});
|
||||
|
||||
it('handles non-SSE errors', () => {
|
||||
const error = new Error('Non-SSE Error');
|
||||
|
||||
source$.error(error);
|
||||
|
||||
jest.runAllTimers();
|
||||
|
||||
expect(logger.error).toHaveBeenCalledWith(error);
|
||||
expect(data).toEqual([
|
||||
`event: error\ndata: ${JSON.stringify({
|
||||
error: {
|
||||
code: ServerSentEventErrorCode.internalError,
|
||||
message: 'Non-SSE Error',
|
||||
},
|
||||
})}\n\n`,
|
||||
]);
|
||||
});
|
||||
|
||||
it('should send keep-alive comments every 10 seconds', () => {
|
||||
jest.advanceTimersByTime(10000);
|
||||
expect(data).toContain(': keep-alive');
|
||||
|
||||
jest.advanceTimersByTime(10000);
|
||||
expect(data.filter((d) => d === ': keep-alive')).toHaveLength(2);
|
||||
});
|
||||
|
||||
describe('without fake timers', () => {
|
||||
beforeEach(() => {
|
||||
jest.useFakeTimers({ doNotFake: ['nextTick'] });
|
||||
});
|
||||
|
||||
it('should end the stream when the observable completes', async () => {
|
||||
jest.useFakeTimers({ doNotFake: ['nextTick'] });
|
||||
|
||||
const endSpy = jest.fn();
|
||||
stream.on('end', endSpy);
|
||||
|
||||
source$.complete();
|
||||
|
||||
await new Promise((resolve) => process.nextTick(resolve));
|
||||
|
||||
expect(endSpy).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should end stream when signal is aborted', async () => {
|
||||
const endSpy = jest.fn();
|
||||
stream.on('end', endSpy);
|
||||
|
||||
// Emit some data
|
||||
source$.next({ type: ServerSentEventType.data, data: { initial: 'data' } });
|
||||
|
||||
// Abort the signal
|
||||
controller.abort();
|
||||
|
||||
// Emit more data after abort
|
||||
source$.next({ type: ServerSentEventType.data, data: { after: 'abort' } });
|
||||
|
||||
await new Promise((resolve) => process.nextTick(resolve));
|
||||
|
||||
expect(endSpy).toHaveBeenCalled();
|
||||
|
||||
// Data after abort should not be received
|
||||
expect(data).toEqual([
|
||||
`event: data\ndata: ${JSON.stringify({ data: { initial: 'data' } })}\n\n`,
|
||||
]);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
jest.useFakeTimers();
|
||||
});
|
||||
});
|
||||
});
|
|
@ -7,12 +7,51 @@
|
|||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
import { map, Observable } from 'rxjs';
|
||||
import { Logger } from '@kbn/logging';
|
||||
import {
|
||||
isSSEError,
|
||||
ServerSentErrorEvent,
|
||||
ServerSentEventErrorCode,
|
||||
} from '@kbn/sse-utils/src/errors';
|
||||
import { ServerSentEvent, ServerSentEventType } from '@kbn/sse-utils/src/events';
|
||||
import { catchError, map, Observable, of } from 'rxjs';
|
||||
import { PassThrough } from 'stream';
|
||||
import { ServerSentEvent } from '@kbn/sse-utils';
|
||||
|
||||
export function observableIntoEventSourceStream(source$: Observable<ServerSentEvent>): PassThrough {
|
||||
const withSerializedEvents$ = source$.pipe(
|
||||
export function observableIntoEventSourceStream(
|
||||
source$: Observable<ServerSentEvent>,
|
||||
{
|
||||
logger,
|
||||
signal,
|
||||
}: {
|
||||
logger: Pick<Logger, 'debug' | 'error'>;
|
||||
signal: AbortSignal;
|
||||
}
|
||||
) {
|
||||
const withSerializedErrors$ = source$.pipe(
|
||||
catchError((error): Observable<ServerSentErrorEvent> => {
|
||||
if (isSSEError(error)) {
|
||||
logger.error(error);
|
||||
logger.debug(() => JSON.stringify(error));
|
||||
return of({
|
||||
type: ServerSentEventType.error,
|
||||
error: {
|
||||
code: error.code,
|
||||
message: error.message,
|
||||
meta: error.meta,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
logger.error(error);
|
||||
|
||||
return of({
|
||||
type: ServerSentEventType.error,
|
||||
error: {
|
||||
code: ServerSentEventErrorCode.internalError,
|
||||
message: error.message as string,
|
||||
},
|
||||
});
|
||||
}),
|
||||
map((event) => {
|
||||
const { type, ...rest } = event;
|
||||
return `event: ${type}\ndata: ${JSON.stringify(rest)}\n\n`;
|
||||
|
@ -21,18 +60,38 @@ export function observableIntoEventSourceStream(source$: Observable<ServerSentEv
|
|||
|
||||
const stream = new PassThrough();
|
||||
|
||||
withSerializedEvents$.subscribe({
|
||||
const intervalId = setInterval(() => {
|
||||
// `:` denotes a comment - this is to keep the connection open
|
||||
// it will be ignored by the SSE parser on the client
|
||||
stream.write(': keep-alive');
|
||||
}, 10000);
|
||||
|
||||
const subscription = withSerializedErrors$.subscribe({
|
||||
next: (line) => {
|
||||
stream.write(line);
|
||||
},
|
||||
complete: () => {
|
||||
stream.end();
|
||||
clearTimeout(intervalId);
|
||||
},
|
||||
error: (error) => {
|
||||
stream.write(`event: error\ndata: ${JSON.stringify(error)}\n\n`);
|
||||
clearTimeout(intervalId);
|
||||
stream.write(
|
||||
`event:error\ndata: ${JSON.stringify({
|
||||
error: {
|
||||
code: ServerSentEventErrorCode.internalError,
|
||||
message: error.message,
|
||||
},
|
||||
})}\n\n`
|
||||
);
|
||||
stream.end();
|
||||
},
|
||||
});
|
||||
|
||||
signal.addEventListener('abort', () => {
|
||||
subscription.unsubscribe();
|
||||
stream.end();
|
||||
});
|
||||
|
||||
return stream;
|
||||
}
|
||||
|
|
|
@ -15,5 +15,6 @@
|
|||
],
|
||||
"kbn_references": [
|
||||
"@kbn/sse-utils",
|
||||
"@kbn/logging",
|
||||
]
|
||||
}
|
||||
|
|
|
@ -21,7 +21,8 @@ function myRequestHandler(
|
|||
data: {
|
||||
anyData: {},
|
||||
},
|
||||
})
|
||||
}),
|
||||
logger
|
||||
),
|
||||
});
|
||||
}
|
||||
|
|
|
@ -1322,6 +1322,10 @@
|
|||
"@kbn/observability-ai-assistant-management-plugin/*": ["x-pack/plugins/observability_solution/observability_ai_assistant_management/*"],
|
||||
"@kbn/observability-ai-assistant-plugin": ["x-pack/plugins/observability_solution/observability_ai_assistant"],
|
||||
"@kbn/observability-ai-assistant-plugin/*": ["x-pack/plugins/observability_solution/observability_ai_assistant/*"],
|
||||
"@kbn/observability-ai-common": ["x-pack/packages/observability/observability_ai/observability_ai_common"],
|
||||
"@kbn/observability-ai-common/*": ["x-pack/packages/observability/observability_ai/observability_ai_common/*"],
|
||||
"@kbn/observability-ai-server": ["x-pack/packages/observability/observability_ai/observability_ai_server"],
|
||||
"@kbn/observability-ai-server/*": ["x-pack/packages/observability/observability_ai/observability_ai_server/*"],
|
||||
"@kbn/observability-alert-details": ["x-pack/packages/observability/alert_details"],
|
||||
"@kbn/observability-alert-details/*": ["x-pack/packages/observability/alert_details/*"],
|
||||
"@kbn/observability-alerting-rule-utils": ["x-pack/packages/observability/alerting_rule_utils"],
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
module.exports = {
|
||||
preset: '@kbn/test',
|
||||
rootDir: '../../../../..',
|
||||
roots: [
|
||||
'<rootDir>/x-pack/packages/observability/observability_ai/observability_ai_common',
|
||||
'<rootDir>/x-pack/packages/observability/observability_ai/observability_ai_server',
|
||||
],
|
||||
};
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"type": "shared-common",
|
||||
"id": "@kbn/observability-ai-common",
|
||||
"owner": "@elastic/obs-ai-assistant",
|
||||
"group": "observability",
|
||||
"visibility": "private"
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"name": "@kbn/observability-ai-common",
|
||||
"private": true,
|
||||
"version": "1.0.0",
|
||||
"license": "Elastic License 2.0"
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
export {
|
||||
RCA_END_PROCESS_TOOL_NAME,
|
||||
RCA_INVESTIGATE_ENTITY_TOOL_NAME,
|
||||
RCA_OBSERVE_TOOL_NAME,
|
||||
} from './tool_names';
|
|
@ -0,0 +1,10 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
export const RCA_OBSERVE_TOOL_NAME = 'observe';
|
||||
export const RCA_END_PROCESS_TOOL_NAME = 'endProcessAndWriteReport';
|
||||
export const RCA_INVESTIGATE_ENTITY_TOOL_NAME = 'investigateEntity';
|
|
@ -0,0 +1,20 @@
|
|||
{
|
||||
"extends": "../../../../../tsconfig.base.json",
|
||||
"compilerOptions": {
|
||||
"outDir": "target/types",
|
||||
"types": [
|
||||
"jest",
|
||||
"node",
|
||||
"react"
|
||||
]
|
||||
},
|
||||
"include": [
|
||||
"**/*.ts",
|
||||
"**/*.tsx",
|
||||
],
|
||||
"exclude": [
|
||||
"target/**/*"
|
||||
],
|
||||
"kbn_references": [
|
||||
]
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
module.exports = {
|
||||
preset: '@kbn/test',
|
||||
rootDir: '../../../../..',
|
||||
roots: ['<rootDir>/x-pack/packages/observability/observability_ai/observability_ai_server'],
|
||||
};
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"type": "shared-server",
|
||||
"id": "@kbn/observability-ai-server",
|
||||
"owner": "@elastic/obs-ai-assistant",
|
||||
"group": "observability",
|
||||
"visibility": "private"
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"name": "@kbn/observability-ai-server",
|
||||
"private": true,
|
||||
"version": "1.0.0",
|
||||
"license": "Elastic License 2.0"
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { from, Observable, of, switchMap } from 'rxjs';
|
||||
import { RCA_END_PROCESS_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis';
|
||||
import { AssistantMessage, MessageRole } from '@kbn/inference-common';
|
||||
import { writeFinalReport } from './tasks/write_final_report';
|
||||
import { EndProcessToolMessage, RootCauseAnalysisContext } from './types';
|
||||
import { generateSignificantEventsTimeline } from './tasks/generate_timeline';
|
||||
import { EMPTY_ASSISTANT_MESSAGE } from './empty_assistant_message';
|
||||
|
||||
export function callEndRcaProcessTool({
|
||||
rcaContext,
|
||||
toolCallId,
|
||||
}: {
|
||||
rcaContext: RootCauseAnalysisContext;
|
||||
toolCallId: string;
|
||||
}): Observable<EndProcessToolMessage | AssistantMessage> {
|
||||
return from(
|
||||
writeFinalReport({
|
||||
rcaContext,
|
||||
})
|
||||
).pipe(
|
||||
switchMap((report) => {
|
||||
return from(
|
||||
generateSignificantEventsTimeline({
|
||||
rcaContext,
|
||||
report,
|
||||
}).then((timeline) => {
|
||||
return { timeline, report };
|
||||
})
|
||||
);
|
||||
}),
|
||||
switchMap(({ report, timeline }) => {
|
||||
const toolMessage: EndProcessToolMessage = {
|
||||
name: RCA_END_PROCESS_TOOL_NAME,
|
||||
role: MessageRole.Tool,
|
||||
toolCallId,
|
||||
response: {
|
||||
report,
|
||||
timeline,
|
||||
},
|
||||
};
|
||||
return of(toolMessage, EMPTY_ASSISTANT_MESSAGE);
|
||||
})
|
||||
);
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { from, Observable, of, switchMap } from 'rxjs';
|
||||
import { MessageRole } from '@kbn/inference-common';
|
||||
import { RCA_INVESTIGATE_ENTITY_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis';
|
||||
import { InvestigateEntityToolMessage, RootCauseAnalysisContext, ToolErrorMessage } from './types';
|
||||
import { investigateEntity } from './tasks/investigate_entity';
|
||||
import { formatEntity } from './util/format_entity';
|
||||
|
||||
export function callInvestigateEntityTool({
|
||||
field,
|
||||
value,
|
||||
context,
|
||||
toolCallId,
|
||||
rcaContext,
|
||||
}: {
|
||||
field: string;
|
||||
value: string;
|
||||
context: string;
|
||||
toolCallId: string;
|
||||
rcaContext: RootCauseAnalysisContext;
|
||||
}): Observable<InvestigateEntityToolMessage | ToolErrorMessage> {
|
||||
const nextEntity = {
|
||||
[field]: value,
|
||||
};
|
||||
|
||||
return from(
|
||||
investigateEntity({
|
||||
rcaContext,
|
||||
entity: nextEntity,
|
||||
context,
|
||||
})
|
||||
).pipe(
|
||||
switchMap((entityInvestigation) => {
|
||||
if (!entityInvestigation) {
|
||||
const entityNotFoundToolMessage: ToolErrorMessage = {
|
||||
name: 'error',
|
||||
role: MessageRole.Tool,
|
||||
response: {
|
||||
error: {
|
||||
message: `Entity ${formatEntity(nextEntity)} not found, have
|
||||
you verified it exists and if the field and value you are using
|
||||
are correct?`,
|
||||
},
|
||||
},
|
||||
toolCallId,
|
||||
};
|
||||
|
||||
return of(entityNotFoundToolMessage);
|
||||
}
|
||||
|
||||
const {
|
||||
attachments,
|
||||
relatedEntities,
|
||||
entity: investigatedEntity,
|
||||
summary,
|
||||
} = entityInvestigation;
|
||||
const toolMessage: InvestigateEntityToolMessage = {
|
||||
name: RCA_INVESTIGATE_ENTITY_TOOL_NAME,
|
||||
role: MessageRole.Tool as const,
|
||||
toolCallId,
|
||||
response: {
|
||||
entity: investigatedEntity,
|
||||
relatedEntities,
|
||||
summary,
|
||||
},
|
||||
data: {
|
||||
attachments,
|
||||
},
|
||||
};
|
||||
|
||||
return of(toolMessage);
|
||||
})
|
||||
);
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { AssistantMessage, MessageRole } from '@kbn/inference-common';
|
||||
import {
|
||||
RCA_INVESTIGATE_ENTITY_TOOL_NAME,
|
||||
RCA_OBSERVE_TOOL_NAME,
|
||||
} from '@kbn/observability-ai-common/root_cause_analysis';
|
||||
import { compact, findLast } from 'lodash';
|
||||
import { from, Observable, of, switchMap } from 'rxjs';
|
||||
import { observeInvestigationResults } from './tasks/observe_investigation_results';
|
||||
import {
|
||||
InvestigateEntityToolMessage,
|
||||
ObservationToolMessage,
|
||||
RootCauseAnalysisContext,
|
||||
RootCauseAnalysisEvent,
|
||||
} from './types';
|
||||
|
||||
export function callObserveTool({
|
||||
rcaContext,
|
||||
toolCallId,
|
||||
}: {
|
||||
rcaContext: RootCauseAnalysisContext;
|
||||
toolCallId: string;
|
||||
}): Observable<ObservationToolMessage> {
|
||||
const { events } = rcaContext;
|
||||
|
||||
const lastAssistantMessage = findLast(
|
||||
events.slice(0, -1),
|
||||
(event): event is Extract<RootCauseAnalysisEvent, AssistantMessage> =>
|
||||
event.role === MessageRole.Assistant
|
||||
);
|
||||
|
||||
const toolMessagesByToolCallId = Object.fromEntries(
|
||||
compact(
|
||||
events.map((message) =>
|
||||
'toolCallId' in message &&
|
||||
(message.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME || message.name === 'error')
|
||||
? [message.toolCallId, message]
|
||||
: undefined
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
const investigationToolMessages =
|
||||
lastAssistantMessage && lastAssistantMessage.toolCalls
|
||||
? compact(
|
||||
lastAssistantMessage.toolCalls.map((investigateEntityToolCall) => {
|
||||
if (investigateEntityToolCall.function.name !== RCA_INVESTIGATE_ENTITY_TOOL_NAME) {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
toolCall: investigateEntityToolCall,
|
||||
toolResponse: toolMessagesByToolCallId[investigateEntityToolCall.toolCallId],
|
||||
};
|
||||
})
|
||||
)
|
||||
: [];
|
||||
|
||||
const investigations = investigationToolMessages
|
||||
.map((toolMessage) => toolMessage.toolResponse)
|
||||
.filter(
|
||||
(toolResponse): toolResponse is InvestigateEntityToolMessage =>
|
||||
toolResponse.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME
|
||||
)
|
||||
.map((toolResponse) => ({ ...toolResponse.data, ...toolResponse.response }));
|
||||
|
||||
return from(
|
||||
observeInvestigationResults({
|
||||
rcaContext,
|
||||
investigations,
|
||||
})
|
||||
).pipe(
|
||||
switchMap((summary) => {
|
||||
const observationToolMessage: ObservationToolMessage = {
|
||||
name: RCA_OBSERVE_TOOL_NAME,
|
||||
response: {
|
||||
content: summary.content,
|
||||
},
|
||||
data: summary,
|
||||
role: MessageRole.Tool,
|
||||
toolCallId,
|
||||
};
|
||||
return of(observationToolMessage);
|
||||
})
|
||||
);
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { AssistantMessage, MessageRole } from '@kbn/inference-common';
|
||||
import { RootCauseAnalysisEvent } from './types';
|
||||
|
||||
export const EMPTY_ASSISTANT_MESSAGE: Extract<RootCauseAnalysisEvent, AssistantMessage> = {
|
||||
content: '',
|
||||
role: MessageRole.Assistant,
|
||||
toolCalls: [],
|
||||
};
|
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
export type {
|
||||
RootCauseAnalysisEvent,
|
||||
InvestigateEntityToolMessage,
|
||||
EndProcessToolMessage,
|
||||
ObservationToolMessage,
|
||||
RootCauseAnalysisToolMessage,
|
||||
ToolErrorMessage,
|
||||
RootCauseAnalysisToolRequest,
|
||||
} from './types';
|
||||
export type { SignificantEventsTimeline, SignificantEvent } from './tasks/generate_timeline';
|
||||
export type { EntityInvestigation } from './tasks/investigate_entity';
|
||||
|
||||
export { runRootCauseAnalysis } from './run_root_cause_analysis';
|
|
@ -0,0 +1,345 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
export const RCA_SYSTEM_PROMPT_BASE = `You are a helpful assistant for Elastic Observability.
|
||||
You are a distinguished SRE, who has an established career, working in both
|
||||
small shops and FAANG-level companies. You have worked with Elasticsearch
|
||||
since the beginning and expertly use it in your analysis of incidents.
|
||||
|
||||
You use an evidence-based strategy to determine the root cause of
|
||||
an incident. You thoroughly analyze Observability data. You use your
|
||||
understanding of different architectures like microservies, monoliths,
|
||||
event-driven systems, and environments like Kubernetes to discover
|
||||
patterns and correlations in the data ingested into the user's system.
|
||||
|
||||
Your sizable experience with monitoring software systems has taught
|
||||
you how to investigate issues and correlate symptoms of the investigate
|
||||
service with its dependencies.
|
||||
|
||||
## Capabilities
|
||||
|
||||
You are highly skilled at inspecting logs, traces, alerts, and SLOs to uncover
|
||||
the root cause of incidents, with a special emphasis on detecting log patterns
|
||||
that reveal system behavior. You can identify related entities, such as upstream
|
||||
services or the specific pod a service is running on, by searching through logs
|
||||
and traces for relationships using metadata like IP addresses, session IDs, or
|
||||
distributed tracing data. While you can analyze alerts and SLO-derived metrics,
|
||||
you do not directly analyze other system metrics, inspect files, or execute
|
||||
commands that modify the system.
|
||||
|
||||
## Non-capabilities
|
||||
|
||||
You lack the capabilities to analyze metrics or connect to external systems.`;
|
||||
|
||||
export const RCA_PROMPT_ENTITIES = `# Entities
|
||||
|
||||
In an Observability system, entities are distinct components or resources within
|
||||
the infrastructure, each representing points of interest for monitoring and
|
||||
troubleshooting. These entities form the backbone of log-based analysis and
|
||||
allow teams to track behavior, detect anomalies, and investigate issues across
|
||||
different layers of the system. Here’s a breakdown of common entities in
|
||||
observability:
|
||||
|
||||
1. Services: Core units of functionality in an application ecosystem,
|
||||
representing individual processes or applications (e.g., user-authentication,
|
||||
payment processing). Services typically expose APIs or endpoints, and logs from
|
||||
these entities often capture requests, responses, and error events, which are
|
||||
critical for understanding application behavior.
|
||||
|
||||
2. Kubernetes (K8s) Entities:
|
||||
- Pods: The smallest deployable units in Kubernetes, usually containing one
|
||||
or more containers. Logs from pods provide insight into container operations,
|
||||
errors, and application states.
|
||||
- Namespaces: Logical groupings within a cluster for organizing and isolating
|
||||
resources, helping in filtering logs by domain or responsibility.
|
||||
- Nodes: Worker machines (either physical or virtual) where pods run. Node
|
||||
logs often cover hardware resource events, errors, and other system-level events
|
||||
relevant to pod health and performance.
|
||||
- Deployments and ReplicaSets: Define and manage the desired state of pod
|
||||
replication and rolling updates. Logs from these components can reveal changes
|
||||
in application versions, scaling events, and configuration updates.
|
||||
|
||||
3. Virtual Machines (VMs): Virtualized computing resources that generate
|
||||
operating system-level logs capturing events such as application crashes,
|
||||
network issues, and OS-related errors.
|
||||
|
||||
4. Applications: Software systems or packages running across the infrastructure,n
|
||||
which may encompass multiple services. Logs from applications track user flows,
|
||||
application states, and error messages, providing context for user interactions
|
||||
and system events.
|
||||
|
||||
5. Serverless Functions (e.g., AWS Lambda): Code executions triggered by
|
||||
specific events. Logs from serverless functions capture invocation details,
|
||||
execution paths, and error traces, which are useful for understanding specific
|
||||
function behaviors and pinpointing execution anomalies.
|
||||
|
||||
6. Databases and Data Stores: Includes SQL/NoSQL databases, caches, and storage
|
||||
solutions. Logs from these entities cover query executions, connection issues,
|
||||
and transaction errors, essential for tracking data layer issues.
|
||||
|
||||
7. Containers: Portable environments running individual services or processes.
|
||||
Container logs capture application and system events within the containerized
|
||||
environment, helping track process-level errors and status changes.
|
||||
|
||||
8. Load Balancers and API Gateways: Components responsible for managing and
|
||||
routing traffic. Logs from these entities include request paths, status codes,
|
||||
and errors encountered, which can indicate connectivity issues or
|
||||
misconfigurations.
|
||||
|
||||
9. Networking Components: Entities like virtual private clouds (VPCs),
|
||||
firewalls, VPNs, and network interfaces. Logs from these components track
|
||||
traffic flows, connectivity issues, and security events, crucial for identifying
|
||||
network-related anomalies.
|
||||
|
||||
10. Clusters and Regions: Groupings of infrastructure either physically or
|
||||
logically, such as across data centers or cloud regions. Cluster and region logs
|
||||
help capture high-level events and error messages, useful for understanding
|
||||
system-wide issues and region-specific disruptions.
|
||||
|
||||
Each of these entities is typically identified by fields such as
|
||||
\`service.name\`, \`kubernetes.pod.name\`, \`container.id\`, or similar fields
|
||||
in log records. Observability systems use these identifiers to connect entities,
|
||||
creating a map of relationships and dependencies that helps teams diagnose
|
||||
issues, understand cross-entity impacts, and uncover root causes in distributed
|
||||
architectures.`;
|
||||
|
||||
export const RCA_PROMPT_DEPENDENCIES = `## Understanding the Flow: Upstream vs. Downstream
|
||||
|
||||
- Upstream dependencies: These are the services that your service
|
||||
depends on. They supply data, perform tasks, or provide resources that
|
||||
your service consumes.
|
||||
- Downstream dependencies: These are the services that depend on your
|
||||
service. They consume the data or resources your service generates.
|
||||
|
||||
When diagnosing issues, distinguishing the direction of dependency can
|
||||
clarify whether a problem originates from your service’s reliance on an
|
||||
external input or whether your service is causing issues for other systems.
|
||||
|
||||
---
|
||||
|
||||
## When to Investigate Upstream Dependencies
|
||||
|
||||
Upstream issues typically occur when your service is failing due to problems
|
||||
with the responses it receives from external systems.
|
||||
|
||||
1. Timeouts and Latency
|
||||
- Symptoms: Slow response times, retries, or timeouts.
|
||||
- Errors: HTTP 504, retrying connection, exceeded timeout threshold.
|
||||
- Focus: Check the performance and availability of upstream services
|
||||
(e.g., APIs, databases) and network latency.
|
||||
|
||||
2. Data Integrity Issues**
|
||||
- Symptoms: Inconsistent or corrupted data.
|
||||
- Errors: unexpected data format, deserialization errors.
|
||||
- Focus: Verify data received from upstream services, and investigate
|
||||
schema or data format changes.
|
||||
|
||||
3. Connection Failures
|
||||
- Symptoms: Your service cannot connect to upstream services.
|
||||
- Errors: DNS lookup failed, connection refused, socket timeout.
|
||||
- Focus: Check upstream service health, DNS, and networking components.
|
||||
|
||||
4. Authentication/Authorization Failures**
|
||||
- Symptoms: Failed access to upstream resources.
|
||||
- Errors: 401 Unauthorized, 403 Forbidden, token issues.
|
||||
- Focus: Validate credentials or tokens and investigate upstream access
|
||||
policies.
|
||||
|
||||
---
|
||||
|
||||
## When to Investigate Downstream Dependencies
|
||||
|
||||
Downstream issues occur when your service is functioning but its outputs cause
|
||||
failures in other services that depend on it.
|
||||
|
||||
1. Data or API Response Issues
|
||||
- Symptoms: Downstream services receive bad or invalid data.
|
||||
- Errors: data type mismatch, invalid JSON format.
|
||||
- Focus: Ensure your service is returning correct data and check for API
|
||||
changes.
|
||||
|
||||
2. Rate-Limiting and Resource Exhaustion**
|
||||
- Symptoms: Downstream services are overwhelmed.
|
||||
- Errors: 429 Too Many Requests, throttling or resource exhaustion.
|
||||
- Focus: Check your service’s request rates and resource usage (e.g., memory, CPU).
|
||||
|
||||
3. Unexpected Behavior or Regression
|
||||
- Symptoms: Downstream failures after a recent deployment.
|
||||
- Errors: New downstream errors after your service changes.
|
||||
- Focus: Review recent updates, API contracts, or integration points.
|
||||
|
||||
4. Eventual Consistency or Queue Backlogs
|
||||
- Symptoms: Delayed processing in downstream systems.
|
||||
- Errors: message queue full, backlog warnings.
|
||||
- Focus: Check event production rates and queue statuses in downstream services.`;
|
||||
|
||||
export const RCA_PROMPT_CHANGES = `## Reasoning about Correlating Changes in Incident Investigations
|
||||
|
||||
In a root cause analysis, understanding the types and timing of changes is key
|
||||
to linking symptoms with underlying causes. Changes can broadly be classified
|
||||
into **symptomatic changes** (indicators of system issues like elevated error
|
||||
rates or degraded throughput) and **system changes** (events that modify system
|
||||
configuration or structure, such as scale-downs, new version rollouts, or
|
||||
significant configuration adjustments). By correlating these changes, we can
|
||||
assess whether observed symptoms are likely related to specific system
|
||||
modifications.
|
||||
|
||||
### Identifying Correlations Between Symptomatic and System Changes
|
||||
|
||||
When investigating a sudden issue—such as a 5x increase in latency—it’s
|
||||
essential to evaluate both the **timing** and **nature** of associated changes
|
||||
in upstream dependencies, resource utilization, and configuration events. For
|
||||
instance:
|
||||
|
||||
- Consistent Symptomatic Behavior: If an upstream dependency exhibits a
|
||||
similar, sustained latency spike around the same time and shows log entries
|
||||
indicating CPU throttling, this would suggest a correlated, persistent issue
|
||||
that may directly impact the observed symptom. A scale-down event preceding the
|
||||
latency increase might indicate that reduced resources are stressing the
|
||||
dependency.
|
||||
|
||||
- Transient vs. Persistent Issues: Another upstream dependency that
|
||||
experiences a brief latency increase but recovers quickly is less likely
|
||||
related. Short-lived changes that self-correct without intervention typically
|
||||
have different root causes or may be unrelated noise.
|
||||
|
||||
### Types of Changes to Consider in Correlation
|
||||
|
||||
1. Log Pattern Changes: A shift in log patterns, especially around error
|
||||
levels, provides significant insight. If there’s an increase in critical or
|
||||
warning log patterns for a dependency during the latency spike, it could
|
||||
indicate that the issue stems from this entity. Compare these log patterns to
|
||||
past behavior to assess whether they represent an anomaly that might warrant
|
||||
further investigation.
|
||||
|
||||
2. Event-Driven System Changes:
|
||||
- Scale Events: Scale-up or scale-down events can directly impact
|
||||
performance. If a latency increase aligns with a scale-down, it may suggest that
|
||||
resource reduction is straining the system.
|
||||
- Release or Deployment Events: A new version rollout or config change is
|
||||
a frequent source of correlated issues. Compare the timing of the latency
|
||||
increase to the deployment to see if the change directly impacts the system.
|
||||
Correlate with alerts or SLO breaches on endpoints to understand the immediate
|
||||
effects of the release.
|
||||
|
||||
3. SLO and Alert-Based Changes: SLO breaches and alerts can provide concrete
|
||||
timestamps for when symptoms begin. For instance, a breach on error rates for a
|
||||
specific service endpoint following a dependency’s scale-down event suggests a
|
||||
possible causal link. An alert indicating sustained latency increase in a
|
||||
dependency that remains unresolved points to a high-priority area for deeper
|
||||
investigation.
|
||||
|
||||
4. Dependency Health and Behavior:
|
||||
- Related vs. Unrelated Dependencies: Similar to the latency example,
|
||||
observe if multiple dependencies experience symptomatic changes simultaneously.
|
||||
Related dependencies should show consistent, similar issues, while unrelated
|
||||
dependencies may exhibit brief, unrelated spikes. Persistent issues across key
|
||||
dependencies likely indicate a systemic cause, while isolated changes are less
|
||||
likely to be relevant.
|
||||
|
||||
### Examples of Reasoning Through Changes
|
||||
|
||||
Consider these scenarios:
|
||||
- Increase in Error Rates and a Recent Deployment: Suppose error rates for
|
||||
an endpoint increase sharply post-deployment. If related logs show new error
|
||||
patterns, this aligns the symptom with a deployment change. Investigate specific
|
||||
changes in the deployment (e.g., code changes or resource allocation).
|
||||
- Throughput Decrease and Scaling Events: If throughput dips shortly after a
|
||||
scale-down event, it might suggest resource constraints. Analyze CPU or memory
|
||||
throttling logs from this period in upstream dependencies to confirm.
|
||||
- Cross-Service Latency Spikes: If multiple services along a call path
|
||||
experience latency spikes, with CPU throttling logs, this suggests a resource
|
||||
bottleneck. Trace logs and alerts related to autoscaling decisions may provide
|
||||
insights into whether the system configuration caused cascading delays.
|
||||
|
||||
By carefully mapping these changes and analyzing their timing, you can
|
||||
distinguish between causally related events and incidental changes, allowing for
|
||||
a targeted and effective investigation.`;
|
||||
|
||||
export const RCA_PROMPT_CHANGE_POINTS = `## Change points
|
||||
|
||||
Change points can be defined as the following type:
|
||||
|
||||
- \`dip\`: a significant dip occurs at this change point
|
||||
- \`distribution_change\`: the overall distribution of the values has changed
|
||||
significantly
|
||||
- \`non_stationary\`: there is no change point, but the values are not from a
|
||||
stationary distribution
|
||||
- \`spike\`: a significant spike occurs at this point
|
||||
- \`stationary\`: no change point found
|
||||
- \`step_change\`: the change indicates a statistically significant step up or
|
||||
down in value distribution
|
||||
- \`trend_change\`: there is an overall trend change occurring at this point
|
||||
|
||||
For \`spike\`, and \`dip\`, this means: a short-lived spike or dip that then again
|
||||
stabilizes. For persisted changes, you'd see a \`step_change\` (if the values
|
||||
before and after the change point are stable), or a \`trend_change\` when the
|
||||
values show an upward or downward trend after the change.`;
|
||||
|
||||
export const RCA_PROMPT_SIGNIFICANT_EVENTS = `## Significant events
|
||||
|
||||
Generate a timeline of significant events. These events should capture
|
||||
significant observed changes in the system that can be extracted from the
|
||||
analyzed data. This timeline is absolutely critical to the investigation,
|
||||
and close attention has to be paid to the data, and the instructions.
|
||||
|
||||
The timeline should focus on key events as captured in log patterns, including
|
||||
both notable changes and unusual/critical messages. This data-driven timeline
|
||||
should help establish a chain of causality, pinpointing when anomalies began,
|
||||
what system behaviors were observed, and how these patterns relate to the overall incident.
|
||||
|
||||
- Use ISO timestamps to ensure precision and clarity.
|
||||
- Include alerts that are part of the investigation. For these, use the start
|
||||
time of the alert, and mention critical information about the alert, such as
|
||||
reason and grouping fields.
|
||||
- Focus on log entries that signal significant system behavior (e.g., errors,
|
||||
retries, anomalies).
|
||||
- Highlight critical log messages or changes in patterns that may correlate
|
||||
with the issue.
|
||||
- Include notable anomalies, such as spikes in error rates, unexpected system
|
||||
responses, or any log entries suggesting failure or degradation.
|
||||
|
||||
Do not include:
|
||||
- Events that are indicative of normal operations.
|
||||
- Events that are unlikely to be related to the investigated issue.
|
||||
|
||||
Key Elements to Include:
|
||||
|
||||
- Log Patterns: Capture log messages that show unusual events or
|
||||
abnormalities such as error codes, failed retries, or changes in log frequency.
|
||||
- Timestamps: Ensure every entry in the timeline is time-stamped
|
||||
with an accurate ISO 8601 timestamp.
|
||||
- Event Description: Provide a clear, concise, and objective description of
|
||||
what was observed in the logs.
|
||||
- Corroborating Data: Link log anomalies to other relevant data points such
|
||||
as traffic shifts, request patterns, or upstream/downstream service impacts.`;
|
||||
|
||||
export const RCA_PROMPT_TIMELINE_GUIDE = `
|
||||
The timeline should focus on key events as
|
||||
captured in log patterns, including both notable changes and unusual/critical
|
||||
messages. This data-driven timeline should help establish a chain of causality,
|
||||
pinpointing when anomalies began, what system behaviors were observed, and how
|
||||
these patterns relate to the overall incident.
|
||||
|
||||
- Use ISO timestamps** to ensure precision and clarity.
|
||||
- Focus on log entries** that signal significant system behavior (e.g.,
|
||||
errors, retries, anomalies).
|
||||
- Highlight critical log messages** or changes in patterns that may correlate
|
||||
with the issue.
|
||||
- Include notable anomalies, such as spikes in error rates, unexpected
|
||||
system responses, or any log entries suggesting failure or degradation.
|
||||
|
||||
Key Elements to Include:
|
||||
|
||||
Log Patterns: Capture log messages that show unusual events or
|
||||
abnormalities such as error codes, failed retries, or changes in log frequency.
|
||||
Timestamps: Ensure every entry in the timeline is time-stamped
|
||||
with an accurate ISO 8601 timestamp.
|
||||
Event Description: Provide a clear, concise description of what was
|
||||
observed in the logs.
|
||||
Corroborating Data: Link log anomalies to other relevant data points such
|
||||
as traffic shifts, request patterns, or upstream/downstream service impacts.`;
|
|
@ -0,0 +1,305 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { RulesClient } from '@kbn/alerting-plugin/server';
|
||||
import { calculateAuto } from '@kbn/calculate-auto';
|
||||
import { MessageRole, AssistantMessage, ToolMessage, ToolChoiceType } from '@kbn/inference-common';
|
||||
import { InferenceClient } from '@kbn/inference-plugin/server';
|
||||
import { Logger } from '@kbn/logging';
|
||||
import { AlertsClient } from '@kbn/rule-registry-plugin/server';
|
||||
import { findLast, pick } from 'lodash';
|
||||
import moment from 'moment';
|
||||
import { catchError, filter, from, map, mergeMap, Observable, of, switchMap } from 'rxjs';
|
||||
import { ObservabilityAIAssistantClient } from '@kbn/observability-ai-assistant-plugin/server';
|
||||
import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client';
|
||||
import {
|
||||
RCA_END_PROCESS_TOOL_NAME,
|
||||
RCA_INVESTIGATE_ENTITY_TOOL_NAME,
|
||||
RCA_OBSERVE_TOOL_NAME,
|
||||
} from '@kbn/observability-ai-common/root_cause_analysis';
|
||||
import { callEndRcaProcessTool } from './call_end_rca_process_tool';
|
||||
import { callInvestigateEntityTool } from './call_investigate_entity_tool';
|
||||
import { callObserveTool } from './call_observe_tool';
|
||||
import { RCA_PROMPT_CHANGES, RCA_PROMPT_ENTITIES, RCA_SYSTEM_PROMPT_BASE } from './prompts';
|
||||
import { RCA_TOOLS } from './tools';
|
||||
import {
|
||||
EndProcessToolMessage,
|
||||
InvestigateEntityToolMessage,
|
||||
ObservationToolMessage,
|
||||
RootCauseAnalysisContext,
|
||||
RootCauseAnalysisEvent,
|
||||
ToolErrorMessage,
|
||||
} from './types';
|
||||
import { callTools } from './util/call_tools';
|
||||
import { formatEntity } from './util/format_entity';
|
||||
import { validateInvestigateEntityToolCalls } from './util/validate_investigate_entity_tool_call';
|
||||
|
||||
const SYSTEM_PROMPT_WITH_OBSERVE_INSTRUCTIONS = `${RCA_SYSTEM_PROMPT_BASE}
|
||||
|
||||
Your next step is to request an observation from another agent based
|
||||
on the initial context or the results of previous investigations.`;
|
||||
|
||||
const SYSTEM_PROMPT_WITH_DECISION_INSTRUCTIONS = `${RCA_SYSTEM_PROMPT_BASE}
|
||||
|
||||
${RCA_PROMPT_ENTITIES}
|
||||
|
||||
${RCA_PROMPT_CHANGES}
|
||||
|
||||
To determine whether to end the process or continue analyzing another entity,
|
||||
follow the advice from the previous observation, and these tips:
|
||||
|
||||
Continuing the process:
|
||||
- Do not investigate an entity twice. This will result in a failure.
|
||||
- Logs, traces, or observability data that suggest upstream or downstream
|
||||
issues (such as connection failures, timeouts, or authentication errors)
|
||||
indicate further investigation is required.
|
||||
|
||||
Ending the process:
|
||||
- No further entities to investigate: If there are no unexplored upstream or
|
||||
downstream dependencies, and all related entities have been investigated without
|
||||
discovering new anomalies, it may be appropriate to end the process.
|
||||
- If all investigated entities (e.g., services, hosts, containers) are
|
||||
functioning normally, with no relevant issues found, and there are no signs of
|
||||
dependencies being affected, you may consider ending the process.
|
||||
- Avoid concluding the investigation based solely on symptoms or the absence
|
||||
of immediate errors in the data. Unless a system change has been connected to
|
||||
the incident, it is important to continue investigating dependencies to ensure
|
||||
the root cause has been accurately identified.`;
|
||||
|
||||
export function runRootCauseAnalysis({
|
||||
serviceName,
|
||||
start: requestedStart,
|
||||
end: requestedEnd,
|
||||
esClient,
|
||||
alertsClient,
|
||||
rulesClient,
|
||||
observabilityAIAssistantClient,
|
||||
spaceId,
|
||||
indices,
|
||||
connectorId,
|
||||
inferenceClient,
|
||||
context: initialContext,
|
||||
logger: incomingLogger,
|
||||
prevEvents,
|
||||
}: {
|
||||
context: string;
|
||||
serviceName: string;
|
||||
logger: Logger;
|
||||
inferenceClient: InferenceClient;
|
||||
start: number;
|
||||
end: number;
|
||||
alertsClient: AlertsClient;
|
||||
rulesClient: RulesClient;
|
||||
esClient: ObservabilityElasticsearchClient;
|
||||
observabilityAIAssistantClient: ObservabilityAIAssistantClient;
|
||||
indices: {
|
||||
logs: string[];
|
||||
traces: string[];
|
||||
sloSummaries: string[];
|
||||
};
|
||||
connectorId: string;
|
||||
spaceId: string;
|
||||
prevEvents?: RootCauseAnalysisEvent[];
|
||||
}): Observable<RootCauseAnalysisEvent> {
|
||||
const logger = incomingLogger.get('rca');
|
||||
|
||||
const entity = { 'service.name': serviceName };
|
||||
|
||||
const bucketSize = calculateAuto
|
||||
.atLeast(30, moment.duration(requestedEnd - requestedStart))!
|
||||
.asMilliseconds();
|
||||
|
||||
const start = Math.floor(requestedStart / bucketSize) * bucketSize;
|
||||
const end = Math.floor(requestedEnd / bucketSize) * bucketSize;
|
||||
|
||||
const initialMessage = {
|
||||
role: MessageRole.User as const,
|
||||
content: `Investigate the health status of ${formatEntity(entity)}.
|
||||
|
||||
The context given for this investigation is:
|
||||
|
||||
${initialContext}`,
|
||||
};
|
||||
|
||||
const nextEvents = [initialMessage, ...(prevEvents ?? [])];
|
||||
|
||||
const initialRcaContext: RootCauseAnalysisContext = {
|
||||
connectorId,
|
||||
start,
|
||||
end,
|
||||
esClient,
|
||||
events: nextEvents,
|
||||
indices,
|
||||
inferenceClient,
|
||||
initialContext,
|
||||
alertsClient,
|
||||
observabilityAIAssistantClient,
|
||||
logger,
|
||||
rulesClient,
|
||||
spaceId,
|
||||
tokenLimit: 32_000,
|
||||
};
|
||||
|
||||
const investigationTimeRangePrompt = `## Time range
|
||||
|
||||
The time range of the investigation is ${new Date(start).toISOString()} until ${new Date(
|
||||
end
|
||||
).toISOString()}`;
|
||||
|
||||
initialContext = `${initialContext}
|
||||
|
||||
${investigationTimeRangePrompt}
|
||||
`;
|
||||
|
||||
const next$ = callTools(
|
||||
{
|
||||
system: RCA_SYSTEM_PROMPT_BASE,
|
||||
connectorId,
|
||||
inferenceClient,
|
||||
messages: nextEvents,
|
||||
logger,
|
||||
},
|
||||
({ messages }) => {
|
||||
const lastSuccessfulToolResponse = findLast(
|
||||
messages,
|
||||
(message) => message.role === MessageRole.Tool && message.name !== 'error'
|
||||
) as Exclude<ToolMessage, ToolErrorMessage> | undefined;
|
||||
|
||||
const shouldWriteObservationNext =
|
||||
!lastSuccessfulToolResponse || lastSuccessfulToolResponse.name !== RCA_OBSERVE_TOOL_NAME;
|
||||
|
||||
const nextTools = shouldWriteObservationNext
|
||||
? pick(RCA_TOOLS, RCA_OBSERVE_TOOL_NAME)
|
||||
: pick(RCA_TOOLS, RCA_END_PROCESS_TOOL_NAME, RCA_INVESTIGATE_ENTITY_TOOL_NAME);
|
||||
|
||||
const nextSystem = shouldWriteObservationNext
|
||||
? SYSTEM_PROMPT_WITH_OBSERVE_INSTRUCTIONS
|
||||
: SYSTEM_PROMPT_WITH_DECISION_INSTRUCTIONS;
|
||||
|
||||
return {
|
||||
messages,
|
||||
system: `${nextSystem}
|
||||
|
||||
${investigationTimeRangePrompt}`,
|
||||
tools: nextTools,
|
||||
toolChoice: shouldWriteObservationNext
|
||||
? { function: RCA_OBSERVE_TOOL_NAME }
|
||||
: ToolChoiceType.required,
|
||||
};
|
||||
},
|
||||
({
|
||||
toolCalls,
|
||||
messages,
|
||||
}): Observable<
|
||||
| ObservationToolMessage
|
||||
| ToolErrorMessage
|
||||
| InvestigateEntityToolMessage
|
||||
| EndProcessToolMessage
|
||||
| AssistantMessage
|
||||
> => {
|
||||
const nextRcaContext = {
|
||||
...initialRcaContext,
|
||||
events: messages as RootCauseAnalysisEvent[],
|
||||
};
|
||||
|
||||
return of(undefined).pipe(
|
||||
switchMap(() => {
|
||||
return from(
|
||||
validateInvestigateEntityToolCalls({ rcaContext: nextRcaContext, toolCalls })
|
||||
);
|
||||
}),
|
||||
switchMap((errors) => {
|
||||
if (errors.length) {
|
||||
return of(
|
||||
...toolCalls.map((toolCall) => {
|
||||
const toolCallErrorMessage: ToolErrorMessage = {
|
||||
role: MessageRole.Tool,
|
||||
name: 'error',
|
||||
response: {
|
||||
error: {
|
||||
message: `Some ${RCA_INVESTIGATE_ENTITY_TOOL_NAME} calls were not valid:
|
||||
${errors.map((error) => `- ${error}`).join('\n')}`,
|
||||
},
|
||||
},
|
||||
toolCallId: toolCall.toolCallId,
|
||||
};
|
||||
return toolCallErrorMessage;
|
||||
})
|
||||
);
|
||||
}
|
||||
return of(...toolCalls).pipe(
|
||||
mergeMap((toolCall) => {
|
||||
function executeToolCall(): Observable<
|
||||
| EndProcessToolMessage
|
||||
| InvestigateEntityToolMessage
|
||||
| ObservationToolMessage
|
||||
| ToolErrorMessage
|
||||
| AssistantMessage
|
||||
> {
|
||||
switch (toolCall.function.name) {
|
||||
case RCA_END_PROCESS_TOOL_NAME:
|
||||
return callEndRcaProcessTool({
|
||||
rcaContext: nextRcaContext,
|
||||
toolCallId: toolCall.toolCallId,
|
||||
});
|
||||
|
||||
case RCA_INVESTIGATE_ENTITY_TOOL_NAME:
|
||||
return callInvestigateEntityTool({
|
||||
context: toolCall.function.arguments.context,
|
||||
field: toolCall.function.arguments.entity.field,
|
||||
value: toolCall.function.arguments.entity.value,
|
||||
rcaContext: nextRcaContext,
|
||||
toolCallId: toolCall.toolCallId,
|
||||
});
|
||||
|
||||
case RCA_OBSERVE_TOOL_NAME:
|
||||
return callObserveTool({
|
||||
rcaContext: nextRcaContext,
|
||||
toolCallId: toolCall.toolCallId,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return executeToolCall().pipe(
|
||||
catchError((error) => {
|
||||
logger.error(`Failed executing task: ${error.message}`);
|
||||
logger.error(error);
|
||||
const toolErrorMessage: ToolErrorMessage = {
|
||||
name: 'error',
|
||||
role: MessageRole.Tool,
|
||||
response: {
|
||||
error: {
|
||||
...('toJSON' in error && typeof error.toJSON === 'function'
|
||||
? error.toJSON()
|
||||
: {}),
|
||||
message: error.message,
|
||||
},
|
||||
},
|
||||
toolCallId: toolCall.toolCallId,
|
||||
};
|
||||
return of(toolErrorMessage);
|
||||
})
|
||||
);
|
||||
}, 3)
|
||||
);
|
||||
})
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
return next$.pipe(
|
||||
filter((event) =>
|
||||
Boolean(event.role !== MessageRole.Assistant || event.content || event.toolCalls?.length)
|
||||
),
|
||||
map((event) => {
|
||||
if (event.role === MessageRole.Assistant) {
|
||||
return event as Extract<RootCauseAnalysisEvent, AssistantMessage>;
|
||||
}
|
||||
return event;
|
||||
})
|
||||
);
|
||||
}
|
|
@ -0,0 +1,402 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
import { getEntityKuery } from '@kbn/observability-utils-common/entities/get_entity_kuery';
|
||||
import { formatValueForKql } from '@kbn/observability-utils-common/es/format_value_for_kql';
|
||||
import type { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
|
||||
import { ShortIdTable } from '@kbn/observability-utils-common/llm/short_id_table';
|
||||
import {
|
||||
P_VALUE_SIGNIFICANCE_HIGH,
|
||||
P_VALUE_SIGNIFICANCE_MEDIUM,
|
||||
} from '@kbn/observability-utils-common/ml/p_value_to_label';
|
||||
import {
|
||||
FieldPatternResultWithChanges,
|
||||
getLogPatterns,
|
||||
} from '@kbn/observability-utils-server/entities/get_log_patterns';
|
||||
import { castArray, compact, groupBy, orderBy } from 'lodash';
|
||||
import { RCA_PROMPT_CHANGES, RCA_PROMPT_ENTITIES } from '../../prompts';
|
||||
import { RootCauseAnalysisContext } from '../../types';
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries';
|
||||
import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
|
||||
|
||||
type LogPatternRelevance = 'normal' | 'unusual' | 'warning' | 'critical';
|
||||
|
||||
export type AnalyzedLogPattern = FieldPatternResultWithChanges & {
|
||||
relevance: LogPatternRelevance;
|
||||
interesting: boolean;
|
||||
};
|
||||
|
||||
export interface AnalyzeLogPatternOutput {
|
||||
ownPatterns: AnalyzedLogPattern[];
|
||||
patternsFromOtherEntities: AnalyzedLogPattern[];
|
||||
}
|
||||
|
||||
const normalDescription = `normal operations, such as such access logs`;
|
||||
const unusualDescription = `something unusual and/or
|
||||
appear rarely, such as startup or shutdown messages or
|
||||
other rare vents`;
|
||||
const warningDescription = `something being in an unexpected state,
|
||||
such as error messages, rate limiting or disk usage warnings`;
|
||||
const criticalDescription = `something being in a critical state,
|
||||
such as startup failure messages, out-of-memory errors or crashloopbackoff
|
||||
events`;
|
||||
|
||||
interface LogPatternCutOff {
|
||||
significance?: 'high' | 'medium' | 'low';
|
||||
pValue?: number;
|
||||
}
|
||||
|
||||
export async function analyzeLogPatterns({
|
||||
entity,
|
||||
allAnalysis,
|
||||
system,
|
||||
rcaContext: { logger: parentLogger, inferenceClient, connectorId, esClient, start, end, indices },
|
||||
cutoff,
|
||||
kbEntries,
|
||||
}: {
|
||||
entity: Record<string, string>;
|
||||
allAnalysis: Array<{ index: string | string[]; analysis: TruncatedDocumentAnalysis }>;
|
||||
system: string;
|
||||
cutoff?: LogPatternCutOff;
|
||||
kbEntries: ScoredKnowledgeBaseEntry[];
|
||||
rcaContext: Pick<
|
||||
RootCauseAnalysisContext,
|
||||
'indices' | 'logger' | 'inferenceClient' | 'connectorId' | 'esClient' | 'start' | 'end'
|
||||
>;
|
||||
}): Promise<AnalyzeLogPatternOutput> {
|
||||
const kuery = getEntityKuery(entity);
|
||||
|
||||
const logger = parentLogger.get('analyzeLogPatterns');
|
||||
|
||||
const fields = ['message', 'error.exception.message'];
|
||||
|
||||
logger.debug(() => `Analyzing log patterns for ${JSON.stringify(entity)}`);
|
||||
|
||||
const systemPrompt = `You are a helpful assistant for Elastic Observability.
|
||||
You are an expert in analyzing log messages for software
|
||||
systems, and you use your extensive experience as an SRE
|
||||
to thoroughly analyze log patterns for things that require
|
||||
attention from the user.
|
||||
|
||||
${RCA_PROMPT_CHANGES}
|
||||
|
||||
${RCA_PROMPT_ENTITIES}
|
||||
|
||||
## Entity
|
||||
|
||||
The following entity is being analyzed:
|
||||
|
||||
${formatEntity(entity)}
|
||||
|
||||
${serializeKnowledgeBaseEntries(kbEntries)}
|
||||
|
||||
### Entity analysis
|
||||
|
||||
${allAnalysis.map(({ index: analyzedIndex, analysis }) => {
|
||||
return `#### Indices: ${castArray(analyzedIndex).join(',')}
|
||||
|
||||
${JSON.stringify(analysis)}`;
|
||||
})}
|
||||
|
||||
${system}`;
|
||||
|
||||
const kueryForOtherEntities = `NOT (${kuery}) AND ${Object.values(entity)
|
||||
.map(
|
||||
(val) =>
|
||||
`(${fields.map((field) => `(${[field, formatValueForKql(val)].join(':')})`).join(' OR ')})`
|
||||
)
|
||||
.join(' AND ')}`;
|
||||
|
||||
const [logPatternsFromEntity, logPatternsFromElsewhere] = await Promise.all([
|
||||
getLogPatterns({
|
||||
esClient,
|
||||
index: [...indices.logs, ...indices.traces],
|
||||
start,
|
||||
end,
|
||||
kuery,
|
||||
includeChanges: true,
|
||||
fields,
|
||||
metadata: [],
|
||||
}),
|
||||
getLogPatterns({
|
||||
esClient,
|
||||
index: [...indices.logs],
|
||||
start,
|
||||
end,
|
||||
kuery: kueryForOtherEntities,
|
||||
metadata: Object.keys(entity),
|
||||
includeChanges: true,
|
||||
fields,
|
||||
}),
|
||||
]);
|
||||
const patternIdLookupTable = new ShortIdTable();
|
||||
|
||||
logger.debug(
|
||||
() =>
|
||||
`Found ${logPatternsFromEntity.length} own log patterns and ${logPatternsFromElsewhere.length} from others`
|
||||
);
|
||||
|
||||
logger.trace(
|
||||
() =>
|
||||
`Found log patterns${JSON.stringify({
|
||||
entity,
|
||||
logPatternsFromEntity,
|
||||
logPatternsFromElsewhere,
|
||||
})}`
|
||||
);
|
||||
|
||||
const patternsWithIds = [...logPatternsFromEntity, ...logPatternsFromElsewhere].map((pattern) => {
|
||||
return {
|
||||
...pattern,
|
||||
shortId: patternIdLookupTable.take(pattern.regex),
|
||||
};
|
||||
});
|
||||
|
||||
const patternsByRegex = new Map(patternsWithIds.map((pattern) => [pattern.regex, pattern]));
|
||||
|
||||
const serializedOwnEntity = formatEntity(entity);
|
||||
|
||||
const [ownPatterns, patternsFromOtherEntities] = await Promise.all([
|
||||
logPatternsFromEntity.length ? categorizeOwnPatterns() : [],
|
||||
logPatternsFromElsewhere.length ? selectRelevantPatternsFromOtherEntities() : [],
|
||||
]);
|
||||
|
||||
logger.trace(
|
||||
() =>
|
||||
`Classified log patterns ${JSON.stringify([entity, ownPatterns, patternsFromOtherEntities])}`
|
||||
);
|
||||
|
||||
const allPatterns = [...ownPatterns, ...patternsFromOtherEntities];
|
||||
|
||||
const sortedByPValueAsc = orderBy(
|
||||
allPatterns.filter((pattern) => pattern.change && pattern.change.p_value),
|
||||
(pattern) => {
|
||||
return pattern.change.p_value;
|
||||
},
|
||||
'asc'
|
||||
);
|
||||
|
||||
const pValueCutOff = getPValueCutoff({ cutoff, max: sortedByPValueAsc[0]?.change.p_value });
|
||||
|
||||
return {
|
||||
ownPatterns: ownPatterns.map((pattern) => ({
|
||||
...pattern,
|
||||
interesting: isInterestingPattern(pattern, pValueCutOff),
|
||||
})),
|
||||
patternsFromOtherEntities: patternsFromOtherEntities.map((pattern) => ({
|
||||
...pattern,
|
||||
interesting: isInterestingPattern(pattern, pValueCutOff),
|
||||
})),
|
||||
};
|
||||
|
||||
function categorizeOwnPatterns() {
|
||||
return inferenceClient
|
||||
.output({
|
||||
id: 'analyze_log_patterns',
|
||||
connectorId,
|
||||
system: systemPrompt,
|
||||
input: `Based on the following log patterns from
|
||||
${formatEntity(entity)}, group these patterns into
|
||||
the following categories:
|
||||
|
||||
- normal (patterns that are indicative of ${normalDescription})
|
||||
- unusual (patterns that are indicative of ${unusualDescription})
|
||||
- warning (patterns that are indicative of ${warningDescription})
|
||||
- critical (patterns that are indicative of ${criticalDescription})
|
||||
|
||||
## Log patterns:
|
||||
|
||||
${preparePatternsForLlm(logPatternsFromEntity)}
|
||||
`,
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
categories: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
relevance: {
|
||||
type: 'string',
|
||||
enum: ['normal', 'unusual', 'warning', 'critical'],
|
||||
},
|
||||
shortIds: {
|
||||
type: 'array',
|
||||
description:
|
||||
'The pattern IDs you want to group here. Use the pattern short ID.',
|
||||
items: {
|
||||
type: 'string',
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ['relevance', 'shortIds'],
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ['categories'],
|
||||
} as const,
|
||||
})
|
||||
.then((outputEvent) => {
|
||||
return outputEvent.output.categories.flatMap((category) => {
|
||||
return mapIdsBackToPatterns(category.shortIds).map((pattern) => {
|
||||
return {
|
||||
...pattern,
|
||||
relevance: category.relevance,
|
||||
};
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function selectRelevantPatternsFromOtherEntities() {
|
||||
return inferenceClient
|
||||
.output({
|
||||
id: 'select_relevant_patterns_from_other_entities',
|
||||
connectorId,
|
||||
system: systemPrompt,
|
||||
input: `Based on the following log patterns that
|
||||
are NOT from ${serializedOwnEntity}, group these
|
||||
patterns into the following categories:
|
||||
|
||||
- irrelevant (patterns that are not relevant for
|
||||
${serializedOwnEntity})
|
||||
- normal (patterns that relevant for
|
||||
${serializedOwnEntity} and are indicative of ${normalDescription})
|
||||
- unusual (patterns that are relevant for
|
||||
${serializedOwnEntity} and are indicative of ${unusualDescription})
|
||||
- warning (patterns that are relevant for
|
||||
${serializedOwnEntity} and are indicative of ${warningDescription})
|
||||
- critical (patterns that are relevant for
|
||||
${serializedOwnEntity} and are indicative of ${criticalDescription})
|
||||
|
||||
Relevant patterns are messages that mention the
|
||||
investigated entity, or things that are indicative
|
||||
of critical failures or changes in the entity
|
||||
that owns the log pattern.
|
||||
|
||||
## Log patterns:
|
||||
|
||||
${preparePatternsForLlm(logPatternsFromElsewhere)}
|
||||
`,
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
categories: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
relevance: {
|
||||
type: 'string',
|
||||
enum: ['irrelevant', 'normal', 'unusual', 'warning', 'critical'],
|
||||
},
|
||||
shortIds: {
|
||||
type: 'array',
|
||||
description:
|
||||
'The pattern IDs you want to group here. Use the pattern short ID.',
|
||||
items: {
|
||||
type: 'string',
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ['relevance', 'shortIds'],
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ['categories'],
|
||||
} as const,
|
||||
})
|
||||
.then((outputEvent) => {
|
||||
return outputEvent.output.categories.flatMap((category) => {
|
||||
return mapIdsBackToPatterns(category.shortIds).flatMap((pattern) => {
|
||||
if (category.relevance === 'irrelevant') {
|
||||
return [];
|
||||
}
|
||||
return [
|
||||
{
|
||||
...pattern,
|
||||
relevance: category.relevance,
|
||||
},
|
||||
];
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function preparePatternsForLlm(patterns: FieldPatternResultWithChanges[]): string {
|
||||
const groupedByField = groupBy(patterns, (pattern) => pattern.field);
|
||||
|
||||
return Object.entries(groupedByField)
|
||||
.map(([field, patternsForField]) => {
|
||||
return `### \`${field}\`
|
||||
|
||||
#### Patterns
|
||||
|
||||
${JSON.stringify(
|
||||
patternsForField.map((pattern) => {
|
||||
return {
|
||||
shortId: patternIdLookupTable.take(pattern.regex),
|
||||
regex: pattern.regex,
|
||||
sample: pattern.sample,
|
||||
highlight: pattern.highlight,
|
||||
change: pattern.change,
|
||||
};
|
||||
})
|
||||
)}
|
||||
`;
|
||||
})
|
||||
.join('\n\n');
|
||||
}
|
||||
|
||||
function mapIdsBackToPatterns(ids?: string[]) {
|
||||
return compact(
|
||||
ids?.map((shortId) => {
|
||||
const lookupId = patternIdLookupTable.lookup(shortId);
|
||||
if (!lookupId) {
|
||||
return undefined;
|
||||
}
|
||||
const pattern = patternsByRegex.get(lookupId);
|
||||
return pattern;
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function isInterestingPattern(
|
||||
pattern: Omit<AnalyzedLogPattern, 'interesting'>,
|
||||
pValueCutOff: number
|
||||
) {
|
||||
return (pattern.change.p_value ?? 1) <= pValueCutOff || pattern.relevance !== 'normal';
|
||||
}
|
||||
|
||||
function getPValueCutoff({ max, cutoff }: { max?: number; cutoff?: LogPatternCutOff }) {
|
||||
if (cutoff?.pValue) {
|
||||
return cutoff?.pValue;
|
||||
}
|
||||
|
||||
if (cutoff?.significance === 'high') {
|
||||
return P_VALUE_SIGNIFICANCE_HIGH;
|
||||
}
|
||||
|
||||
if (cutoff?.significance === 'medium') {
|
||||
return P_VALUE_SIGNIFICANCE_MEDIUM;
|
||||
}
|
||||
|
||||
if (max === undefined) {
|
||||
return Number.MAX_VALUE;
|
||||
}
|
||||
|
||||
if (max <= P_VALUE_SIGNIFICANCE_HIGH) {
|
||||
return P_VALUE_SIGNIFICANCE_HIGH;
|
||||
}
|
||||
|
||||
if (max <= P_VALUE_SIGNIFICANCE_MEDIUM) {
|
||||
return P_VALUE_SIGNIFICANCE_MEDIUM;
|
||||
}
|
||||
|
||||
return Number.MAX_VALUE;
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { InferenceClient } from '@kbn/inference-plugin/server';
|
||||
import { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
|
||||
import { FieldPatternResultWithChanges } from '@kbn/observability-utils-server/entities/get_log_patterns';
|
||||
import { RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries';
|
||||
import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
|
||||
import { getInvestigateEntityTaskPrompt } from '../investigate_entity/prompts';
|
||||
|
||||
export async function describeEntity({
|
||||
inferenceClient,
|
||||
connectorId,
|
||||
entity,
|
||||
contextForEntityInvestigation,
|
||||
analysis,
|
||||
ownPatterns,
|
||||
kbEntries,
|
||||
}: {
|
||||
inferenceClient: InferenceClient;
|
||||
connectorId: string;
|
||||
entity: Record<string, string>;
|
||||
analysis: TruncatedDocumentAnalysis;
|
||||
contextForEntityInvestigation: string;
|
||||
ownPatterns: FieldPatternResultWithChanges[];
|
||||
kbEntries: ScoredKnowledgeBaseEntry[];
|
||||
}) {
|
||||
const system = RCA_SYSTEM_PROMPT_BASE;
|
||||
|
||||
const input = `${getInvestigateEntityTaskPrompt({ entity, contextForEntityInvestigation })}
|
||||
|
||||
## Context for investigating ${formatEntity(entity)}
|
||||
|
||||
${contextForEntityInvestigation}
|
||||
|
||||
${serializeKnowledgeBaseEntries(kbEntries)}
|
||||
|
||||
## Data samples
|
||||
|
||||
${JSON.stringify(analysis)}
|
||||
|
||||
## Log patterns
|
||||
|
||||
${JSON.stringify(ownPatterns.map(({ regex, sample }) => ({ regex, sample })))}
|
||||
|
||||
## Current task
|
||||
|
||||
Describe the entity characteristics based on the sample documents and log
|
||||
patterns. Put it in context of the investigation process. Mention the reason
|
||||
why it's being investigated, and how it is related other entities that were
|
||||
previously investigated. Mention these three things:
|
||||
|
||||
- infrastructure & environment
|
||||
- communication characteristics (protocols and endpoints)
|
||||
- context of entity in investigation
|
||||
|
||||
You shouldn't mention the log patterns, they will be analyzed elsewhere.
|
||||
`;
|
||||
|
||||
const response = await inferenceClient.output({
|
||||
id: 'describe_entity',
|
||||
connectorId,
|
||||
system,
|
||||
input,
|
||||
});
|
||||
|
||||
return response.content;
|
||||
}
|
|
@ -0,0 +1,189 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { InferenceClient } from '@kbn/inference-plugin/server';
|
||||
import { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
|
||||
import { omit, partition, sumBy } from 'lodash';
|
||||
import { RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries';
|
||||
import { AnalyzedLogPattern } from '../analyze_log_patterns';
|
||||
import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
|
||||
import { getInvestigateEntityTaskPrompt } from '../investigate_entity/prompts';
|
||||
|
||||
export interface LogPatternDescription {
|
||||
content: string;
|
||||
docCount: number;
|
||||
interestingPatternCount: number;
|
||||
ignoredPatternCount: number;
|
||||
ignoredDocCount: number;
|
||||
}
|
||||
|
||||
export async function describeLogPatterns({
|
||||
inferenceClient,
|
||||
connectorId,
|
||||
entity,
|
||||
contextForEntityInvestigation,
|
||||
analysis,
|
||||
ownPatterns: allOwnPatterns,
|
||||
patternsFromOtherEntities,
|
||||
kbEntries,
|
||||
}: {
|
||||
inferenceClient: InferenceClient;
|
||||
connectorId: string;
|
||||
entity: Record<string, string>;
|
||||
analysis: TruncatedDocumentAnalysis;
|
||||
contextForEntityInvestigation: string;
|
||||
ownPatterns: AnalyzedLogPattern[];
|
||||
patternsFromOtherEntities: AnalyzedLogPattern[];
|
||||
kbEntries: ScoredKnowledgeBaseEntry[];
|
||||
}): Promise<LogPatternDescription> {
|
||||
const system = RCA_SYSTEM_PROMPT_BASE;
|
||||
|
||||
const [ownInterestingPatterns, ignoredOwnPatterns] = partition(
|
||||
allOwnPatterns,
|
||||
(pattern) => pattern.interesting
|
||||
);
|
||||
|
||||
const stats = {
|
||||
docCount: sumBy(allOwnPatterns, (pattern) => pattern.count),
|
||||
interestingPatternCount: ownInterestingPatterns.length,
|
||||
otherInterestingPatternCount: patternsFromOtherEntities.length,
|
||||
ignoredPatternCount: ignoredOwnPatterns.length,
|
||||
ignoredDocCount: sumBy(ignoredOwnPatterns, (pattern) => pattern.count),
|
||||
};
|
||||
|
||||
const header = `## Log analysis
|
||||
|
||||
### Stats for own log patterns:
|
||||
- ${stats.docCount} documents analyzed
|
||||
- ${stats.interestingPatternCount} interesting patterns
|
||||
- ${stats.ignoredPatternCount} ignored patterns, accounting for
|
||||
${stats.ignoredDocCount} out of ${stats.docCount} documents
|
||||
- ${stats.otherInterestingPatternCount} relevant patterns from
|
||||
other entities`;
|
||||
|
||||
if (!stats.interestingPatternCount && !stats.otherInterestingPatternCount) {
|
||||
return {
|
||||
...stats,
|
||||
content: `${header}\n\nNo interesting log patterns`,
|
||||
};
|
||||
}
|
||||
|
||||
const ownLogPatternsPrompt = ownInterestingPatterns.length
|
||||
? JSON.stringify(
|
||||
ownInterestingPatterns.map(({ regex, sample, change, count, timeseries }) => ({
|
||||
regex,
|
||||
sample,
|
||||
change,
|
||||
count,
|
||||
timeseries: timeseries.map(({ x, y }, index) => {
|
||||
if (index === change.change_point) {
|
||||
return `${change.type} at ${new Date(x).toISOString()}: ${y}`;
|
||||
}
|
||||
return `${new Date(x).toISOString()}: ${y}`;
|
||||
}),
|
||||
}))
|
||||
)
|
||||
: 'No own log patterns found';
|
||||
|
||||
const otherLogPatternsPrompt = patternsFromOtherEntities.length
|
||||
? JSON.stringify(
|
||||
patternsFromOtherEntities.map(
|
||||
({ regex, sample, change, count, timeseries, metadata, field, highlight }) => ({
|
||||
regex,
|
||||
sample,
|
||||
change,
|
||||
count,
|
||||
timeseries: timeseries.map(({ x, y }, index) => {
|
||||
if (index === change.change_point) {
|
||||
return `${change.type} at ${new Date(x).toISOString()}: ${y}`;
|
||||
}
|
||||
return `${new Date(x).toISOString()}: ${y}`;
|
||||
}),
|
||||
entity: omit(metadata, field),
|
||||
highlight,
|
||||
})
|
||||
)
|
||||
)
|
||||
: 'No relevant log patterns found from other entities';
|
||||
|
||||
const input = `${getInvestigateEntityTaskPrompt({ entity, contextForEntityInvestigation })}
|
||||
|
||||
## Context for investigating ${formatEntity(entity)}
|
||||
|
||||
${contextForEntityInvestigation}
|
||||
|
||||
${serializeKnowledgeBaseEntries(kbEntries)}
|
||||
|
||||
## Data samples
|
||||
|
||||
${JSON.stringify(analysis)}
|
||||
|
||||
## Log patterns from ${formatEntity(entity)}
|
||||
|
||||
${ownLogPatternsPrompt}
|
||||
|
||||
## Possibly relevant log patterns from other entities
|
||||
|
||||
${otherLogPatternsPrompt}
|
||||
|
||||
### Interpreting log patterns and samples
|
||||
|
||||
The pattern itself is what is consistent across all messages. The values from these parts
|
||||
are separately given in "constants". There's also a single (random) _sample_ included, with
|
||||
the variable part being given as well. E.g., if the failure in the sample is not part of the pattern
|
||||
itself, you should mention that in your analysis.
|
||||
|
||||
## Task
|
||||
|
||||
Using only the log patterns, describe your observations about the entity.
|
||||
|
||||
Group these pattterns together based on topic. Some examples of these topics:
|
||||
|
||||
- normal operations such as request logs
|
||||
- connection issues to an upstream dependency
|
||||
- startup messages
|
||||
- garbage collection messages
|
||||
|
||||
For patterns with change points, describe the trend before and after the change point based
|
||||
on the data points. E.g.:
|
||||
- A persisted drop to near-zero after 2020-01-01T05:00:00.000Z
|
||||
- A spike from 10 to 100 at 2020-01-01T05:00:00.000Z, which went back down
|
||||
to the average after 2020-01-01T05:02:00.000Z
|
||||
- A trend change after 2020-01-01T05:00:00.000Z. The values ranged from 10
|
||||
to 20 before, but then after increased from 20 to 100 until
|
||||
2020-01-01T05:02:00.000Z.
|
||||
|
||||
Do not:
|
||||
- repeat the variables, instead, repeat the constants.
|
||||
- repeat the timeseries as a whole, verbatim, in full. However, you can use individual data points + timestamps to illustrate the magnitude of the change, as in the example previously given.
|
||||
- make up timestamps.
|
||||
- do not separately list individual events if you have already mentioned
|
||||
the pattern.
|
||||
|
||||
Statistics:
|
||||
|
||||
- ${stats.interestingPatternCount} patterns from ${formatEntity(entity)}
|
||||
were collected
|
||||
- ${stats.docCount} logs were categorized
|
||||
- ${stats.ignoredPatternCount} patterns were deemed uninteresting and accounted
|
||||
for ${stats.ignoredDocCount} out of the total amount of logs
|
||||
`;
|
||||
|
||||
const response = await inferenceClient.output({
|
||||
id: 'describe_log_patterns',
|
||||
connectorId,
|
||||
system,
|
||||
input,
|
||||
});
|
||||
|
||||
return {
|
||||
...stats,
|
||||
content: response.content,
|
||||
};
|
||||
}
|
|
@ -0,0 +1,438 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { InferenceClient } from '@kbn/inference-plugin/server';
|
||||
import { Logger } from '@kbn/logging';
|
||||
import { getEntityKuery } from '@kbn/observability-utils-common/entities/get_entity_kuery';
|
||||
import {
|
||||
DocumentAnalysis,
|
||||
TruncatedDocumentAnalysis,
|
||||
} from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
|
||||
import { sortAndTruncateAnalyzedFields } from '@kbn/observability-utils-common/llm/log_analysis/sort_and_truncate_analyzed_fields';
|
||||
import { analyzeDocuments } from '@kbn/observability-utils-server/entities/analyze_documents';
|
||||
import { FieldPatternResultWithChanges } from '@kbn/observability-utils-server/entities/get_log_patterns';
|
||||
import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client';
|
||||
import { kqlQuery } from '@kbn/observability-utils-server/es/queries/kql_query';
|
||||
import { rangeQuery } from '@kbn/observability-utils-server/es/queries/range_query';
|
||||
import { chunk, isEmpty, isEqual } from 'lodash';
|
||||
import pLimit from 'p-limit';
|
||||
import {
|
||||
RCA_PROMPT_DEPENDENCIES,
|
||||
RCA_PROMPT_ENTITIES,
|
||||
RCA_SYSTEM_PROMPT_BASE,
|
||||
} from '../../prompts';
|
||||
import { chunkOutputCalls } from '../../util/chunk_output_calls';
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries';
|
||||
import { toBlockquote } from '../../util/to_blockquote';
|
||||
import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
|
||||
import { RelatedEntityKeywordSearch } from './write_keyword_searches_for_related_entities';
|
||||
|
||||
export interface RelatedEntityFromSearchResults {
|
||||
entity: { [x: string]: string };
|
||||
highlight: Record<string, string[]>;
|
||||
analysis: TruncatedDocumentAnalysis;
|
||||
}
|
||||
|
||||
function getPromptForFoundEntity({ entity, analysis, highlight }: RelatedEntityFromSearchResults) {
|
||||
return `## Entity: ${formatEntity(entity)}
|
||||
|
||||
${toBlockquote(`### Search highlights for ${formatEntity(entity)}
|
||||
${JSON.stringify(highlight)}`)}
|
||||
`;
|
||||
}
|
||||
|
||||
function getInputPromptBase({
|
||||
entity,
|
||||
analysis,
|
||||
ownPatterns,
|
||||
patternsFromOtherEntities,
|
||||
searches,
|
||||
context,
|
||||
kbEntries,
|
||||
}: {
|
||||
entity: Record<string, string>;
|
||||
analysis: TruncatedDocumentAnalysis;
|
||||
ownPatterns: FieldPatternResultWithChanges[];
|
||||
patternsFromOtherEntities: FieldPatternResultWithChanges[];
|
||||
searches: RelatedEntityKeywordSearch[];
|
||||
context: string;
|
||||
kbEntries: ScoredKnowledgeBaseEntry[];
|
||||
}) {
|
||||
const otherPatternsPrompt = patternsFromOtherEntities.length
|
||||
? JSON.stringify(
|
||||
patternsFromOtherEntities.map((pattern) => ({
|
||||
sample: pattern.sample,
|
||||
regex: pattern.regex,
|
||||
}))
|
||||
)
|
||||
: 'No relevant log patterns from other entities found';
|
||||
const logPatternsPrompt = ownPatterns.length
|
||||
? JSON.stringify(
|
||||
ownPatterns.map((pattern) => {
|
||||
return { sample: pattern.sample, regex: pattern.regex };
|
||||
})
|
||||
)
|
||||
: 'No log patterns found';
|
||||
return `Describe possible relationships to the investigated entity ${formatEntity(entity)}.
|
||||
|
||||
## Context
|
||||
|
||||
${toBlockquote(context)}
|
||||
|
||||
${serializeKnowledgeBaseEntries(kbEntries)}
|
||||
|
||||
## Data analysis
|
||||
${JSON.stringify(analysis)}
|
||||
|
||||
## Log patterns for ${formatEntity(entity)}
|
||||
|
||||
${logPatternsPrompt}
|
||||
|
||||
## Patterns from other entities
|
||||
|
||||
${otherPatternsPrompt}
|
||||
|
||||
## Search keywords
|
||||
|
||||
${searches
|
||||
.map(({ fragments, appearsAs }) => {
|
||||
return `## Appears as: ${appearsAs}
|
||||
|
||||
### Fragments:
|
||||
${fragments.map((fragment) => `- \`${fragment}\``).join('\n')}`;
|
||||
})
|
||||
.join('\n')}`;
|
||||
}
|
||||
|
||||
function getInputPromptInstructions({ entity }: { entity: Record<string, any> }) {
|
||||
return `### Indicator strength
|
||||
|
||||
In an Observability system, indicators of relationships between entities like
|
||||
services, hosts, users, or requests can vary in strength. Some indicators
|
||||
clearly define relationships, while others only suggest correlations. Here’s a
|
||||
breakdown of these indicators into strong, average, and weak categories, with an
|
||||
additional look at how weak indicators can become strong when combined.
|
||||
|
||||
Strong indicators provide definitive links between entities. Distributed tracing
|
||||
IDs (trace, span, and parent) are among the strongest indicators, as they map
|
||||
the complete request path across services, showing exact service interactions.
|
||||
Session or user IDs are also strong indicators, capturing a user’s actions
|
||||
across services or hosts and revealing issues specific to particular users.
|
||||
|
||||
Average indicators give helpful context but may require supporting data to
|
||||
clarify relationships. IP addresses, for instance, are moderately strong for
|
||||
tracking inter-service calls within controlled environments but are weaker
|
||||
across public or shared networks where IP reuse is common. URL paths also fall
|
||||
in this category; they link entities to specific endpoints or service functions
|
||||
and are moderately strong for tracking interactions between microservices with
|
||||
known APIs. Port numbers are another average indicator. While they suggest the
|
||||
service interaction type (HTTP, database), they generally need pairing with IP
|
||||
addresses or URLs for more accuracy, as port numbers alone are often shared
|
||||
across different services.
|
||||
|
||||
Weak indicators are often too generic to imply a direct relationship but can
|
||||
suggest possible correlations. Host names, for example, are broad and typically
|
||||
cover a range of services or applications, especially in large clusters.
|
||||
Time-based indicators, such as timestamps or TTL values, suggest possible timing
|
||||
correlations but don’t establish a definitive link on their own. Status codes,
|
||||
like HTTP 500 errors, indicate issues but don’t specify causality, often
|
||||
requiring corroboration with stronger indicators like trace or session IDs.
|
||||
|
||||
However, weak indicators can become strong when they appear together. For
|
||||
instance, a combination of IP address, port, and timestamp can strongly suggest
|
||||
a direct interaction between services, especially when the same combination is
|
||||
seen repeatedly or in conjunction with related URLs. Similarly, a host name
|
||||
combined with a unique URL path can strongly suggest that a specific service or
|
||||
pod is generating particular request patterns, even if each alone is too
|
||||
general.
|
||||
|
||||
## Relevance to the investigation
|
||||
|
||||
Given the context of the investigation, some entities might be very relevant
|
||||
even if there is no strong evidence of them being a direct dependency of
|
||||
${formatEntity(entity)}. For instance, the related entity might be an
|
||||
orchestrating entity, or it might be involved in a specific operation related
|
||||
to the ongoing issue.
|
||||
|
||||
## Identifying entity relationships
|
||||
|
||||
Your current task is to identify possible entity relationships for the
|
||||
investigated entity ${formatEntity(entity)}. You will get some context, document
|
||||
analysis for the investigated entity, and results from keyword searches that were
|
||||
extracted from the entity. Based on this data, list entities that could possibly
|
||||
be related to the given entity and/or the initial context. List the highly
|
||||
relevant entities first.
|
||||
|
||||
## Output
|
||||
|
||||
For each possible relationship, describe the following things:
|
||||
- The related entity (as a key-value pair)
|
||||
- The indicators you have observed as evidence of the relationship. Include the
|
||||
strength of the indicator, and the exact pieces of data that are related to it
|
||||
(field names and values, in both the investigated entity, and the possibly
|
||||
related entity).
|
||||
- Reason how the related entity is related to both ${formatEntity(entity)} as a
|
||||
dependency and the context. For instance, describe who is the caller and callee
|
||||
or whether that is unclear, based on the data, or explain how it might be
|
||||
related to the context.
|
||||
- The overall likeliness of it being a relevant entity.`;
|
||||
}
|
||||
|
||||
export async function analyzeFetchedRelatedEntities({
|
||||
connectorId,
|
||||
inferenceClient,
|
||||
esClient,
|
||||
start,
|
||||
end,
|
||||
searches,
|
||||
groupingFields,
|
||||
index,
|
||||
entity,
|
||||
ownPatterns,
|
||||
analysis,
|
||||
patternsFromOtherEntities,
|
||||
logger: parentLogger,
|
||||
context,
|
||||
kbEntries,
|
||||
}: {
|
||||
connectorId: string;
|
||||
inferenceClient: InferenceClient;
|
||||
esClient: ObservabilityElasticsearchClient;
|
||||
start: number;
|
||||
end: number;
|
||||
searches: RelatedEntityKeywordSearch[];
|
||||
groupingFields: string[];
|
||||
index: string | string[];
|
||||
entity: Record<string, string>;
|
||||
analysis: {
|
||||
truncated: TruncatedDocumentAnalysis;
|
||||
full: DocumentAnalysis;
|
||||
};
|
||||
ownPatterns: FieldPatternResultWithChanges[];
|
||||
patternsFromOtherEntities: FieldPatternResultWithChanges[];
|
||||
context: string;
|
||||
logger: Logger;
|
||||
kbEntries: ScoredKnowledgeBaseEntry[];
|
||||
}): Promise<{
|
||||
summaries: string[];
|
||||
foundEntities: RelatedEntityFromSearchResults[];
|
||||
}> {
|
||||
const entityFields = Object.keys(entity);
|
||||
|
||||
const logger = parentLogger.get('findRelatedEntities');
|
||||
|
||||
logger.debug(
|
||||
() => `Finding related entities: ${JSON.stringify({ entity, groupingFields, searches })}`
|
||||
);
|
||||
|
||||
const allValuesFromEntity = Array.from(
|
||||
new Set(analysis.full.fields.flatMap((field) => field.values))
|
||||
);
|
||||
|
||||
const foundEntities = (
|
||||
await Promise.all(
|
||||
groupingFields.map((groupingField) => getResultsForGroupingField(groupingField))
|
||||
)
|
||||
).flat();
|
||||
|
||||
logger.debug(() => `Found ${foundEntities.length} entities via keyword searches`);
|
||||
|
||||
const system = `${RCA_SYSTEM_PROMPT_BASE}
|
||||
|
||||
${RCA_PROMPT_ENTITIES}
|
||||
|
||||
${RCA_PROMPT_DEPENDENCIES}`;
|
||||
|
||||
const inputPromptBase = getInputPromptBase({
|
||||
entity,
|
||||
analysis: analysis.truncated,
|
||||
ownPatterns,
|
||||
patternsFromOtherEntities,
|
||||
searches,
|
||||
context,
|
||||
kbEntries,
|
||||
});
|
||||
|
||||
const foundEntityPrompts = foundEntities.map((foundEntity) => {
|
||||
return {
|
||||
text: getPromptForFoundEntity(foundEntity),
|
||||
id: formatEntity(foundEntity.entity),
|
||||
};
|
||||
});
|
||||
|
||||
const inputPromptInstructions = getInputPromptInstructions({ entity });
|
||||
|
||||
// don't do more than 10 entities in a response, we'll run out of
|
||||
// tokens
|
||||
const requests = chunk(foundEntityPrompts, 10).flatMap((texts) =>
|
||||
chunkOutputCalls({
|
||||
system,
|
||||
input: `${inputPromptBase} ${inputPromptInstructions}`,
|
||||
texts,
|
||||
tokenLimit: 32_000 - 6_000,
|
||||
})
|
||||
);
|
||||
|
||||
const allRelevantEntityDescriptions = await Promise.all(
|
||||
requests.map(async (request) => {
|
||||
const outputCompleteEvent = await inferenceClient.output({
|
||||
id: 'describe_relevant_entities',
|
||||
connectorId,
|
||||
system: request.system,
|
||||
input: `${inputPromptBase}
|
||||
|
||||
# Found entities
|
||||
|
||||
${request.texts.map((text) => text.text).join('\n\n')}
|
||||
|
||||
${inputPromptInstructions}`,
|
||||
});
|
||||
|
||||
return outputCompleteEvent.content;
|
||||
})
|
||||
);
|
||||
|
||||
return {
|
||||
summaries: allRelevantEntityDescriptions,
|
||||
foundEntities,
|
||||
};
|
||||
|
||||
async function getResultsForGroupingField(
|
||||
groupingField: string
|
||||
): Promise<RelatedEntityFromSearchResults[]> {
|
||||
const excludeQuery = isEqual([groupingField], entityFields)
|
||||
? `NOT (${groupingField}:"${entity[groupingField]}")`
|
||||
: ``;
|
||||
|
||||
const fieldCaps = await esClient.fieldCaps('check_if_grouping_field_exists', {
|
||||
fields: [groupingField],
|
||||
index,
|
||||
index_filter: {
|
||||
bool: {
|
||||
filter: [...rangeQuery(start, end)],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
if (isEmpty(fieldCaps.fields[groupingField])) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const keywordSearchResults = await esClient.search(
|
||||
'find_related_entities_via_keyword_searches',
|
||||
{
|
||||
track_total_hits: false,
|
||||
index,
|
||||
query: {
|
||||
bool: {
|
||||
must: [...rangeQuery(start, end), ...kqlQuery(excludeQuery)],
|
||||
should: [
|
||||
{
|
||||
multi_match: {
|
||||
query: searches.flatMap((search) => search.fragments).join(' '),
|
||||
fields: '*',
|
||||
},
|
||||
},
|
||||
],
|
||||
minimum_should_match: 1,
|
||||
},
|
||||
},
|
||||
fields: [groupingField],
|
||||
collapse: {
|
||||
field: groupingField,
|
||||
},
|
||||
highlight: {
|
||||
fields: {
|
||||
'*': {},
|
||||
},
|
||||
},
|
||||
_source: false,
|
||||
size: 1_000,
|
||||
}
|
||||
);
|
||||
|
||||
if (!keywordSearchResults.hits.hits.length) {
|
||||
logger.debug(() => `No hits: ${JSON.stringify({ entity, groupingField, searches })}`);
|
||||
return [];
|
||||
}
|
||||
|
||||
logger.trace(
|
||||
() =>
|
||||
`Hits: ${JSON.stringify({
|
||||
entity,
|
||||
groupingField,
|
||||
searches,
|
||||
count: keywordSearchResults.hits.hits.length,
|
||||
hits: keywordSearchResults.hits.hits,
|
||||
})}`
|
||||
);
|
||||
|
||||
const limiter = pLimit(20);
|
||||
|
||||
const groupingFieldAnalysis = await Promise.all(
|
||||
keywordSearchResults.hits.hits.map(async (hit) => {
|
||||
return limiter(async () => {
|
||||
const groupValue = hit.fields![groupingField][0] as string;
|
||||
|
||||
const analysisForGroupingField = await analyzeDocuments({
|
||||
esClient,
|
||||
start,
|
||||
end,
|
||||
index,
|
||||
kuery: getEntityKuery({
|
||||
[groupingField]: groupValue,
|
||||
}),
|
||||
});
|
||||
|
||||
const analysisWithRelevantValues = {
|
||||
...analysisForGroupingField,
|
||||
fields: analysisForGroupingField.fields
|
||||
.filter((field) => {
|
||||
return !field.empty;
|
||||
})
|
||||
.map((field) => {
|
||||
const valuesFoundInEntity = field.values.filter((value) => {
|
||||
return (
|
||||
allValuesFromEntity.includes(value) ||
|
||||
allValuesFromEntity.some((valueFromEntity) => {
|
||||
return (
|
||||
typeof valueFromEntity === 'string' &&
|
||||
typeof value === 'string' &&
|
||||
(value.includes(valueFromEntity) || valueFromEntity.includes(value))
|
||||
);
|
||||
})
|
||||
);
|
||||
});
|
||||
return {
|
||||
...field,
|
||||
values: valuesFoundInEntity,
|
||||
};
|
||||
}),
|
||||
};
|
||||
|
||||
return {
|
||||
groupingField,
|
||||
key: groupValue,
|
||||
highlight: hit.highlight!,
|
||||
analysis: sortAndTruncateAnalyzedFields(analysisWithRelevantValues),
|
||||
};
|
||||
});
|
||||
})
|
||||
);
|
||||
|
||||
return groupingFieldAnalysis.map(({ key, highlight, analysis: analysisForGroupingField }) => {
|
||||
return {
|
||||
entity: {
|
||||
[groupingField]: key,
|
||||
},
|
||||
highlight,
|
||||
analysis: analysisForGroupingField,
|
||||
};
|
||||
});
|
||||
}
|
||||
}
|
|
@ -0,0 +1,159 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import stringify from 'json-stable-stringify';
|
||||
import pLimit from 'p-limit';
|
||||
import { RelatedEntityFromSearchResults } from '.';
|
||||
import {
|
||||
RCA_PROMPT_DEPENDENCIES,
|
||||
RCA_PROMPT_ENTITIES,
|
||||
RCA_SYSTEM_PROMPT_BASE,
|
||||
} from '../../prompts';
|
||||
import { RootCauseAnalysisContext } from '../../types';
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { getPreviouslyInvestigatedEntities } from '../../util/get_previously_investigated_entities';
|
||||
import { toBlockquote } from '../../util/to_blockquote';
|
||||
|
||||
export interface RelatedEntityDescription {
|
||||
entity: Record<string, string>;
|
||||
reason: string;
|
||||
confidence: string;
|
||||
}
|
||||
|
||||
export async function extractRelatedEntities({
|
||||
entity,
|
||||
entityReport,
|
||||
summaries,
|
||||
foundEntities,
|
||||
context,
|
||||
rcaContext: { events, connectorId, inferenceClient },
|
||||
}: {
|
||||
foundEntities: RelatedEntityFromSearchResults[];
|
||||
entity: Record<string, string>;
|
||||
entityReport: string;
|
||||
summaries: string[];
|
||||
context: string;
|
||||
rcaContext: Pick<RootCauseAnalysisContext, 'events' | 'connectorId' | 'inferenceClient'>;
|
||||
}): Promise<{ relatedEntities: RelatedEntityDescription[] }> {
|
||||
const system = `${RCA_SYSTEM_PROMPT_BASE}
|
||||
|
||||
${RCA_PROMPT_ENTITIES}
|
||||
|
||||
${RCA_PROMPT_DEPENDENCIES}`;
|
||||
|
||||
const previouslyInvestigatedEntities = getPreviouslyInvestigatedEntities({ events });
|
||||
|
||||
const previouslyInvestigatedEntitiesPrompt = previouslyInvestigatedEntities.length
|
||||
? `## Previously investigated entities
|
||||
|
||||
${previouslyInvestigatedEntities
|
||||
.map((prevEntity) => `- ${formatEntity(prevEntity)}`)
|
||||
.join('\n')}`
|
||||
: '';
|
||||
|
||||
const prompts = summaries.map((summary) => {
|
||||
return `
|
||||
# Investigated entity
|
||||
|
||||
${formatEntity(entity)}
|
||||
|
||||
# Report
|
||||
|
||||
${toBlockquote(entityReport)}
|
||||
|
||||
# Related entities report
|
||||
|
||||
${toBlockquote(summary)}
|
||||
|
||||
${previouslyInvestigatedEntitiesPrompt}
|
||||
|
||||
# Context
|
||||
|
||||
${context}
|
||||
|
||||
# Task
|
||||
|
||||
Your current task is to extract relevant entities as a data structure from the
|
||||
related entities report. Order them by relevance to the investigation, put the
|
||||
most relevant ones first.
|
||||
`;
|
||||
});
|
||||
|
||||
const limiter = pLimit(5);
|
||||
|
||||
const allEvents = await Promise.all(
|
||||
prompts.map(async (input) => {
|
||||
const completeEvent = await limiter(() =>
|
||||
inferenceClient.output({
|
||||
id: 'get_entity_relationships',
|
||||
connectorId,
|
||||
system,
|
||||
input,
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
related_entities: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
entity: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
field: {
|
||||
type: 'string',
|
||||
},
|
||||
value: {
|
||||
type: 'string',
|
||||
},
|
||||
},
|
||||
required: ['field', 'value'],
|
||||
},
|
||||
reason: {
|
||||
type: 'string',
|
||||
description: 'Describe why this entity might be relevant. Provide evidence.',
|
||||
},
|
||||
confidence: {
|
||||
type: 'string',
|
||||
description:
|
||||
'Describe how confident you are in your conclusion about this relationship: low, moderate, high',
|
||||
},
|
||||
},
|
||||
|
||||
required: ['entity', 'reason', 'confidence'],
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ['related_entities'],
|
||||
} as const,
|
||||
})
|
||||
);
|
||||
return completeEvent.output;
|
||||
})
|
||||
);
|
||||
|
||||
const foundEntityIds = foundEntities.map(({ entity: foundEntity }) => stringify(foundEntity));
|
||||
|
||||
const relatedEntities = allEvents
|
||||
.flat()
|
||||
.flatMap((event) => {
|
||||
return event.related_entities.map((item) => {
|
||||
return {
|
||||
entity: { [item.entity.field]: item.entity.value },
|
||||
reason: item.reason,
|
||||
confidence: item.confidence,
|
||||
};
|
||||
});
|
||||
})
|
||||
.filter((item) => {
|
||||
return foundEntityIds.includes(stringify(item.entity));
|
||||
});
|
||||
|
||||
return {
|
||||
relatedEntities,
|
||||
};
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { InferenceClient } from '@kbn/inference-plugin/server';
|
||||
import { Logger } from '@kbn/logging';
|
||||
import {
|
||||
DocumentAnalysis,
|
||||
TruncatedDocumentAnalysis,
|
||||
} from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
|
||||
import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client';
|
||||
import { FieldPatternResultWithChanges } from '@kbn/observability-utils-server/entities/get_log_patterns';
|
||||
import {
|
||||
analyzeFetchedRelatedEntities,
|
||||
RelatedEntityFromSearchResults,
|
||||
} from './analyze_fetched_related_entities';
|
||||
import {
|
||||
RelatedEntityKeywordSearch,
|
||||
writeKeywordSearchForRelatedEntities,
|
||||
} from './write_keyword_searches_for_related_entities';
|
||||
import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
|
||||
|
||||
export type { RelatedEntityFromSearchResults };
|
||||
|
||||
export async function findRelatedEntities({
|
||||
connectorId,
|
||||
inferenceClient,
|
||||
start,
|
||||
end,
|
||||
index,
|
||||
esClient,
|
||||
entity,
|
||||
analysis,
|
||||
logger,
|
||||
context,
|
||||
ownPatterns,
|
||||
patternsFromOtherEntities,
|
||||
kbEntries,
|
||||
}: {
|
||||
connectorId: string;
|
||||
inferenceClient: InferenceClient;
|
||||
start: number;
|
||||
end: number;
|
||||
index: string | string[];
|
||||
esClient: ObservabilityElasticsearchClient;
|
||||
entity: Record<string, string>;
|
||||
analysis: {
|
||||
truncated: TruncatedDocumentAnalysis;
|
||||
full: DocumentAnalysis;
|
||||
};
|
||||
logger: Logger;
|
||||
context: string;
|
||||
ownPatterns: FieldPatternResultWithChanges[];
|
||||
patternsFromOtherEntities: FieldPatternResultWithChanges[];
|
||||
kbEntries: ScoredKnowledgeBaseEntry[];
|
||||
}): Promise<{
|
||||
searches: RelatedEntityKeywordSearch[];
|
||||
summaries: string[];
|
||||
foundEntities: RelatedEntityFromSearchResults[];
|
||||
}> {
|
||||
const { groupingFields, searches } = await writeKeywordSearchForRelatedEntities({
|
||||
connectorId,
|
||||
inferenceClient,
|
||||
entity,
|
||||
analysis: analysis.truncated,
|
||||
ownPatterns,
|
||||
context,
|
||||
kbEntries,
|
||||
});
|
||||
|
||||
const { summaries, foundEntities } = await analyzeFetchedRelatedEntities({
|
||||
entity,
|
||||
connectorId,
|
||||
start,
|
||||
end,
|
||||
esClient,
|
||||
index,
|
||||
inferenceClient,
|
||||
searches,
|
||||
groupingFields,
|
||||
logger,
|
||||
analysis,
|
||||
ownPatterns,
|
||||
patternsFromOtherEntities,
|
||||
context,
|
||||
kbEntries,
|
||||
});
|
||||
|
||||
return {
|
||||
searches,
|
||||
summaries,
|
||||
foundEntities,
|
||||
};
|
||||
}
|
|
@ -0,0 +1,199 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { InferenceClient } from '@kbn/inference-plugin/server';
|
||||
import { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
|
||||
import { FieldPatternResultWithChanges } from '@kbn/observability-utils-server/entities/get_log_patterns';
|
||||
import { RCA_PROMPT_ENTITIES, RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries';
|
||||
import { toBlockquote } from '../../util/to_blockquote';
|
||||
import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
|
||||
|
||||
const SYSTEM_PROMPT_ADDENDUM = `# Guide: Constructing Keyword Searches to Find Related Entities
|
||||
|
||||
When investigating issues like elevated failure rates for a
|
||||
specific endpoint, you can use the metadata at hand (IP addresses,
|
||||
URLs, session IDs, tracing IDs, etc.) to build targeted keyword searches.
|
||||
By extracting meaningful fragments from the data, you can correlate
|
||||
related services or hosts across distributed systems. Here’s how
|
||||
you can break down the metadata and format your searches.
|
||||
|
||||
## Grouping fields
|
||||
|
||||
Define grouping fields for the entities you want to extract. For
|
||||
instance, "service.name" if you are looking for services, or
|
||||
"kubernetes.pod.name" if you are looking for pods. Focus
|
||||
on services, unless you are looking for deployment or
|
||||
configuration changes.
|
||||
|
||||
---
|
||||
|
||||
## Key Metadata and Search Format
|
||||
|
||||
### Example: Investigating a service failure for \`/api/products\`
|
||||
|
||||
You can break down various pieces of metadata into searchable
|
||||
fragments. For each value, include a short description of its
|
||||
relationship to the investigation. This value will be used
|
||||
by the system to determine the relevance of a given entity
|
||||
that matches the search request.
|
||||
|
||||
### 1. **IP Address and Port**
|
||||
- **Fragments:**
|
||||
- \`"10.44.0.11:8080"\`: Full address.
|
||||
- \`"10.44.0.11"\`: IP address only.
|
||||
- \`"8080"\`: Port number.
|
||||
- **Appears as:** This IP address and port are referenced as
|
||||
<ip-field-name> and <port-field-name> in the investigated entity
|
||||
<entity-name>..
|
||||
|
||||
### 2. **Outgoing Request URL**
|
||||
- **Fragments:**
|
||||
- \`"http://called-service/api/product"\`: Full outgoing URL.
|
||||
- \`"/api/product*"\`: Endpoint path.
|
||||
- \`"called-service"\`: Service name of the upstream dependency.
|
||||
- **Appears as:** These URL fragments appear as attributes.request.url
|
||||
in the investigated entity <entity-name>. They could appear as referer
|
||||
in the upstream dependency.
|
||||
|
||||
### 3. **Parent and Span IDs**
|
||||
- **Fragments:**
|
||||
- \`"000aa"\`: Parent ID.
|
||||
- \`"000bbb"\`: Span ID.
|
||||
- **Relationship:** These ids appear as span.id and parent.id in the
|
||||
investigated entity <entity-name>. They could be referring to spans
|
||||
found on upstream or downstream services.
|
||||
|
||||
---
|
||||
|
||||
## Example Search Format in JSON
|
||||
|
||||
To structure your keyword search, format the fragments and their
|
||||
relationships in a JSON array like this:
|
||||
|
||||
\`\`\`json
|
||||
{
|
||||
"groupingFields": [ "service.name" ],
|
||||
"values": [
|
||||
{
|
||||
"fragments": [
|
||||
"10.44.0.11:8080",
|
||||
"10.44.0.11",
|
||||
"8080"
|
||||
],
|
||||
"appearsAs": "This IP address and port are referenced as <ip-field-name> and <port-field-name> in the investigated entity <entity-name>."
|
||||
},
|
||||
{
|
||||
"fragments": [
|
||||
"http://<upstream-service>/api/product",
|
||||
"/api/product",
|
||||
"<upstream-service>"
|
||||
],
|
||||
"relationship": "These URL fragments appear as attributes.request.url in the investigated entity <entity-name>."
|
||||
},
|
||||
{
|
||||
"fragments": [
|
||||
"000aa",
|
||||
"000bbb"
|
||||
],
|
||||
"relationship": " These ids appear as span.id and parent.id in the investigated entity <entity-name>. They could be referring to spans found on upstream or downstream services"
|
||||
}
|
||||
]
|
||||
}`;
|
||||
|
||||
export interface RelatedEntityKeywordSearch {
|
||||
fragments: string[];
|
||||
appearsAs: string;
|
||||
}
|
||||
|
||||
export async function writeKeywordSearchForRelatedEntities({
|
||||
connectorId,
|
||||
inferenceClient,
|
||||
entity,
|
||||
analysis,
|
||||
ownPatterns,
|
||||
context,
|
||||
kbEntries,
|
||||
}: {
|
||||
connectorId: string;
|
||||
inferenceClient: InferenceClient;
|
||||
entity: Record<string, string>;
|
||||
analysis: TruncatedDocumentAnalysis;
|
||||
ownPatterns: FieldPatternResultWithChanges[];
|
||||
context: string;
|
||||
kbEntries: ScoredKnowledgeBaseEntry[];
|
||||
}): Promise<{
|
||||
groupingFields: string[];
|
||||
searches: RelatedEntityKeywordSearch[];
|
||||
}> {
|
||||
const logPatternsPrompt = ownPatterns.length
|
||||
? JSON.stringify(
|
||||
ownPatterns.map((pattern) => ({ regex: pattern.regex, sample: pattern.sample }))
|
||||
)
|
||||
: 'No log patterns found';
|
||||
|
||||
return inferenceClient
|
||||
.output({
|
||||
id: 'extract_keyword_searches',
|
||||
connectorId,
|
||||
system: `${RCA_SYSTEM_PROMPT_BASE}
|
||||
|
||||
${RCA_PROMPT_ENTITIES}`,
|
||||
input: `Your current task is to to extract keyword searches
|
||||
to find related entities to the entity ${formatEntity(entity)},
|
||||
based on the following context:
|
||||
|
||||
## Investigation context
|
||||
${toBlockquote(context)}
|
||||
|
||||
${serializeKnowledgeBaseEntries(kbEntries)}
|
||||
|
||||
## Data analysis
|
||||
${JSON.stringify(analysis)}
|
||||
|
||||
## Log patterns
|
||||
|
||||
${logPatternsPrompt}
|
||||
|
||||
## Instructions
|
||||
${SYSTEM_PROMPT_ADDENDUM}`,
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
groupingFields: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'string',
|
||||
},
|
||||
},
|
||||
searches: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
fragments: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'string',
|
||||
},
|
||||
},
|
||||
appearsAs: {
|
||||
type: 'string',
|
||||
description:
|
||||
'Describe in what fields these values appear as in the investigated entity. You can mention multiple fields if applicable',
|
||||
},
|
||||
},
|
||||
required: ['fragments', 'appearsAs'],
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ['searches', 'groupingFields'],
|
||||
} as const,
|
||||
})
|
||||
.then((event) => event.output);
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
|
||||
import { RootCauseAnalysisContext } from '../../types';
|
||||
import { stringifySummaries } from '../../util/stringify_summaries';
|
||||
|
||||
type SignificantEventSeverity = 'info' | 'unusual' | 'warning' | 'critical';
|
||||
|
||||
type SignificantEventType = 'alert' | 'slo' | 'event';
|
||||
|
||||
export interface SignificantEvent {
|
||||
severity: SignificantEventSeverity;
|
||||
'@timestamp'?: string;
|
||||
description: string;
|
||||
type: SignificantEventType;
|
||||
}
|
||||
|
||||
export interface SignificantEventsTimeline {
|
||||
events: SignificantEvent[];
|
||||
}
|
||||
|
||||
export async function generateSignificantEventsTimeline({
|
||||
report,
|
||||
rcaContext,
|
||||
}: {
|
||||
report: string;
|
||||
rcaContext: RootCauseAnalysisContext;
|
||||
}): Promise<SignificantEventsTimeline> {
|
||||
const { connectorId, inferenceClient } = rcaContext;
|
||||
|
||||
return await inferenceClient
|
||||
.output({
|
||||
id: 'generate_timeline',
|
||||
system: RCA_SYSTEM_PROMPT_BASE,
|
||||
connectorId,
|
||||
input: `Your current task is to generate a timeline
|
||||
of significant events, based on the given RCA report,
|
||||
according to a structured schema. This timeline will
|
||||
be presented to the user as a visualization.
|
||||
|
||||
${stringifySummaries(rcaContext)}
|
||||
|
||||
# Report
|
||||
|
||||
${report}
|
||||
`,
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
events: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
timestamp: {
|
||||
type: 'string',
|
||||
description: 'The ISO timestamp of when the event occurred',
|
||||
},
|
||||
severity: {
|
||||
type: 'string',
|
||||
enum: ['info', 'unusual', 'warning', 'critical'],
|
||||
},
|
||||
type: {
|
||||
type: 'string',
|
||||
enum: ['alert', 'slo', 'event'],
|
||||
},
|
||||
description: {
|
||||
type: 'string',
|
||||
description: 'A description of the event',
|
||||
},
|
||||
},
|
||||
required: ['severity', 'description'],
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ['events'],
|
||||
} as const,
|
||||
})
|
||||
.then((timelineCompleteEvent) => {
|
||||
return {
|
||||
events: timelineCompleteEvent.output.events.map((event) => {
|
||||
return {
|
||||
'@timestamp': event.timestamp,
|
||||
severity: event.severity,
|
||||
type: event.type ?? 'event',
|
||||
description: event.description,
|
||||
};
|
||||
}),
|
||||
};
|
||||
});
|
||||
}
|
|
@ -0,0 +1,185 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { ShortIdTable } from '@kbn/observability-ai-assistant-plugin/common';
|
||||
import { decode, encode } from 'gpt-tokenizer';
|
||||
import { orderBy, sumBy } from 'lodash';
|
||||
import { RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
|
||||
import { RootCauseAnalysisContext } from '../../types';
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { toBlockquote } from '../../util/to_blockquote';
|
||||
|
||||
export interface ScoredKnowledgeBaseEntry {
|
||||
id: string;
|
||||
text: string;
|
||||
tokens: number;
|
||||
score: number;
|
||||
truncated?: {
|
||||
tokens: number;
|
||||
text: string;
|
||||
};
|
||||
}
|
||||
|
||||
export async function getKnowledgeBaseEntries({
|
||||
entity,
|
||||
context,
|
||||
rcaContext,
|
||||
maxTokens: maxTokensForEntries,
|
||||
}: {
|
||||
entity: Record<string, string>;
|
||||
context: string;
|
||||
rcaContext: RootCauseAnalysisContext;
|
||||
maxTokens: number;
|
||||
}): Promise<ScoredKnowledgeBaseEntry[]> {
|
||||
const response = await rcaContext.observabilityAIAssistantClient.recall({
|
||||
queries: [
|
||||
...Object.values(entity).map((value) => ({ text: value, boost: 3 })),
|
||||
{ text: context },
|
||||
],
|
||||
limit: {
|
||||
tokenCount: Number.MAX_VALUE,
|
||||
},
|
||||
});
|
||||
|
||||
const { inferenceClient, connectorId } = rcaContext;
|
||||
|
||||
const shortIdTable = new ShortIdTable();
|
||||
|
||||
const system = RCA_SYSTEM_PROMPT_BASE;
|
||||
|
||||
const input = `Re-order the attached documents, based on relevance to the context.
|
||||
Score them between 1 and 5, based on their relative relevance to each other. The
|
||||
most relevant doc should be scored 5, and the least relevant doc should be scored
|
||||
1.
|
||||
|
||||
# Entity
|
||||
|
||||
${formatEntity(entity)}
|
||||
|
||||
# Context
|
||||
|
||||
${toBlockquote(context)}
|
||||
`;
|
||||
|
||||
const maxTokensForScoring = rcaContext.tokenLimit - encode(system + input).length - 1_000;
|
||||
|
||||
const entriesWithTokens = response.map((entry) => {
|
||||
return {
|
||||
id: entry.id,
|
||||
text: entry.text,
|
||||
tokens: encode(entry.text),
|
||||
};
|
||||
});
|
||||
|
||||
const totalTokenCount = sumBy(entriesWithTokens, (entry) => entry.tokens.length);
|
||||
|
||||
const truncatedEntriesWithShortIds = entriesWithTokens.map((entry) => {
|
||||
const tokensForEntry = Math.floor(
|
||||
(entry.tokens.length / totalTokenCount) * maxTokensForScoring
|
||||
);
|
||||
|
||||
const truncatedText = decode(entry.tokens.slice(0, tokensForEntry));
|
||||
const isTruncated = tokensForEntry < entry.tokens.length;
|
||||
|
||||
return {
|
||||
id: entry.id,
|
||||
tokens: entry.tokens,
|
||||
shortId: shortIdTable.take(entry.id),
|
||||
text: entry.text,
|
||||
truncatedText,
|
||||
isTruncated,
|
||||
};
|
||||
});
|
||||
|
||||
const scoredEntries = await inferenceClient.output({
|
||||
id: 'score_entries',
|
||||
connectorId,
|
||||
system: RCA_SYSTEM_PROMPT_BASE,
|
||||
input: `${input}
|
||||
|
||||
${truncatedEntriesWithShortIds
|
||||
.map((entry) => {
|
||||
return `# ID: ${entry.shortId}
|
||||
|
||||
## Text (${entry.isTruncated ? `truncated` : `not truncated `})
|
||||
|
||||
${toBlockquote(entry.truncatedText)}
|
||||
`;
|
||||
})
|
||||
.join('\n\n')}
|
||||
`,
|
||||
stream: false,
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
docs: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
score: {
|
||||
type: 'number',
|
||||
description:
|
||||
'A score between 1 and 5, with 5 being most relevant, and 1 being least relevant',
|
||||
},
|
||||
id: {
|
||||
type: 'string',
|
||||
},
|
||||
},
|
||||
required: ['score', 'id'],
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ['docs'],
|
||||
},
|
||||
} as const);
|
||||
|
||||
const scoresById = new Map(scoredEntries.output.docs.map((doc) => [doc.id, doc.score]));
|
||||
|
||||
const entriesWithScore = truncatedEntriesWithShortIds.map((entry) => {
|
||||
const score = scoresById.get(entry.shortId) ?? 0;
|
||||
return {
|
||||
...entry,
|
||||
score,
|
||||
};
|
||||
});
|
||||
|
||||
const sortedEntries = orderBy(entriesWithScore, (entry) => entry.score, 'desc');
|
||||
|
||||
const returnedEntries: ScoredKnowledgeBaseEntry[] = [];
|
||||
|
||||
const tokensLeft = maxTokensForEntries;
|
||||
|
||||
sortedEntries.forEach((entry) => {
|
||||
if (entry.tokens.length <= tokensLeft) {
|
||||
returnedEntries.push({
|
||||
id: entry.id,
|
||||
text: entry.text,
|
||||
tokens: entry.tokens.length,
|
||||
score: entry.score,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const tokensToTake = tokensLeft;
|
||||
if (tokensToTake > 0) {
|
||||
const tookTokens = entry.tokens.slice(0, tokensToTake);
|
||||
returnedEntries.push({
|
||||
id: entry.id,
|
||||
text: entry.text,
|
||||
tokens: entry.tokens.length,
|
||||
score: entry.score,
|
||||
truncated: {
|
||||
text: decode(tookTokens),
|
||||
tokens: tookTokens.length,
|
||||
},
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return returnedEntries;
|
||||
}
|
|
@ -0,0 +1,268 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { getEntityKuery } from '@kbn/observability-utils-common/entities/get_entity_kuery';
|
||||
import { sortAndTruncateAnalyzedFields } from '@kbn/observability-utils-common/llm/log_analysis/sort_and_truncate_analyzed_fields';
|
||||
import { analyzeDocuments } from '@kbn/observability-utils-server/entities/analyze_documents';
|
||||
import { getDataStreamsForEntity } from '@kbn/observability-utils-server/entities/get_data_streams_for_entity';
|
||||
import { getAlertsForEntity } from '@kbn/observability-utils-server/entities/signals/get_alerts_for_entity';
|
||||
import { getSlosForEntity } from '@kbn/observability-utils-server/entities/signals/get_slos_for_entity';
|
||||
import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client';
|
||||
import { RootCauseAnalysisContext } from '../../types';
|
||||
import { stringifySummaries } from '../../util/stringify_summaries';
|
||||
import { analyzeLogPatterns } from '../analyze_log_patterns';
|
||||
import { describeEntity } from '../describe_entity';
|
||||
import { describeLogPatterns } from '../describe_log_patterns';
|
||||
import { findRelatedEntities } from '../find_related_entities';
|
||||
import { extractRelatedEntities } from '../find_related_entities/extract_related_entities';
|
||||
import { writeEntityInvestigationReport } from '../write_entity_investigation_report';
|
||||
import { EntityInvestigation } from './types';
|
||||
import { getKnowledgeBaseEntries } from '../get_knowledge_base_entries';
|
||||
|
||||
export type { EntityInvestigation };
|
||||
|
||||
export interface EntityInvestigationParameters {
|
||||
entity: Record<string, string>;
|
||||
rcaContext: RootCauseAnalysisContext;
|
||||
context: string;
|
||||
}
|
||||
|
||||
export async function investigateEntity(
|
||||
parameters: EntityInvestigationParameters
|
||||
): Promise<EntityInvestigation | undefined> {
|
||||
const {
|
||||
entity,
|
||||
rcaContext,
|
||||
rcaContext: {
|
||||
inferenceClient,
|
||||
connectorId,
|
||||
start,
|
||||
end,
|
||||
esClient,
|
||||
logger: parentLogger,
|
||||
indices,
|
||||
},
|
||||
context,
|
||||
} = parameters;
|
||||
const kuery = getEntityKuery(entity);
|
||||
|
||||
const logger = parentLogger.get('investigateEntity');
|
||||
|
||||
logger.debug(() => `Investigating entity: ${JSON.stringify(parameters.entity)}`);
|
||||
|
||||
const kbPromise = getKnowledgeBaseEntries({
|
||||
entity,
|
||||
context,
|
||||
rcaContext,
|
||||
maxTokens: 4_000,
|
||||
}).catch((error) => {
|
||||
logger.error(`Could not fetch entries from knowledge base`);
|
||||
logger.error(error);
|
||||
return [];
|
||||
});
|
||||
|
||||
const [{ dataStreams }, alerts, slos] = await getSignals({ ...parameters, kuery });
|
||||
|
||||
logger.debug(
|
||||
() =>
|
||||
`Signals for entity ${JSON.stringify(entity)}: ${dataStreams.length} data streams, ${
|
||||
alerts.length
|
||||
} alerts, ${slos.length} slos`
|
||||
);
|
||||
|
||||
if (!dataStreams.length) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const fullAnalysis = await analyzeDataStreamsForEntity({
|
||||
start,
|
||||
end,
|
||||
esClient,
|
||||
kuery,
|
||||
dataStreams,
|
||||
});
|
||||
|
||||
const truncatedAnalysis = sortAndTruncateAnalyzedFields(fullAnalysis);
|
||||
|
||||
const kbEntries = await kbPromise;
|
||||
|
||||
const { ownPatterns, patternsFromOtherEntities } = await analyzeLogPatterns({
|
||||
allAnalysis: [{ index: dataStreams, analysis: truncatedAnalysis }],
|
||||
entity,
|
||||
system: stringifySummaries(rcaContext),
|
||||
cutoff: {
|
||||
significance: 'high',
|
||||
},
|
||||
rcaContext,
|
||||
kbEntries,
|
||||
});
|
||||
|
||||
logger.trace(
|
||||
() => `Analyzed log patterns: ${JSON.stringify({ ownPatterns, patternsFromOtherEntities })}`
|
||||
);
|
||||
|
||||
const entityReportPromise = Promise.all([
|
||||
describeEntity({
|
||||
inferenceClient,
|
||||
analysis: truncatedAnalysis,
|
||||
connectorId,
|
||||
contextForEntityInvestigation: context,
|
||||
entity,
|
||||
ownPatterns,
|
||||
kbEntries,
|
||||
}),
|
||||
describeLogPatterns({
|
||||
analysis: truncatedAnalysis,
|
||||
connectorId,
|
||||
contextForEntityInvestigation: context,
|
||||
entity,
|
||||
inferenceClient,
|
||||
ownPatterns,
|
||||
patternsFromOtherEntities,
|
||||
kbEntries,
|
||||
}),
|
||||
]).then(([entityDescription, logPatternDescription]) => {
|
||||
return writeEntityInvestigationReport({
|
||||
connectorId,
|
||||
inferenceClient,
|
||||
entityDescription,
|
||||
logPatternDescription,
|
||||
contextForEntityInvestigation: context,
|
||||
entity,
|
||||
}).then((report) => {
|
||||
return {
|
||||
description: entityDescription,
|
||||
logPatternDescription,
|
||||
report,
|
||||
};
|
||||
});
|
||||
});
|
||||
|
||||
const [entityReport, relatedEntitiesResults] = await Promise.all([
|
||||
entityReportPromise,
|
||||
findRelatedEntities({
|
||||
connectorId,
|
||||
end,
|
||||
entity,
|
||||
esClient,
|
||||
index: indices.logs,
|
||||
inferenceClient,
|
||||
logger,
|
||||
start,
|
||||
context,
|
||||
analysis: {
|
||||
full: fullAnalysis,
|
||||
truncated: truncatedAnalysis,
|
||||
},
|
||||
ownPatterns,
|
||||
patternsFromOtherEntities,
|
||||
kbEntries,
|
||||
}).then(async ({ searches, summaries, foundEntities }) => {
|
||||
const report = await entityReportPromise;
|
||||
|
||||
const { relatedEntities } = await extractRelatedEntities({
|
||||
entityReport: report.report,
|
||||
summaries,
|
||||
entity,
|
||||
foundEntities,
|
||||
context,
|
||||
rcaContext,
|
||||
});
|
||||
|
||||
return {
|
||||
relatedEntities,
|
||||
foundEntities,
|
||||
searches,
|
||||
summaries,
|
||||
};
|
||||
}),
|
||||
]);
|
||||
|
||||
return {
|
||||
entity,
|
||||
summary: [
|
||||
entityReport.description,
|
||||
entityReport.logPatternDescription.content,
|
||||
entityReport.report,
|
||||
].join('\n\n'),
|
||||
relatedEntities: relatedEntitiesResults.relatedEntities,
|
||||
attachments: {
|
||||
alerts,
|
||||
slos,
|
||||
analysis: truncatedAnalysis,
|
||||
ownPatterns,
|
||||
patternsFromOtherEntities,
|
||||
searches: relatedEntitiesResults.searches,
|
||||
relatedEntitiesSummaries: relatedEntitiesResults.summaries,
|
||||
kbEntries,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async function getSignals({
|
||||
entity,
|
||||
kuery,
|
||||
rcaContext: { start, end, esClient, rulesClient, alertsClient, indices, spaceId },
|
||||
}: {
|
||||
kuery: string;
|
||||
entity: Record<string, unknown>;
|
||||
rcaContext: Pick<
|
||||
RootCauseAnalysisContext,
|
||||
'start' | 'end' | 'esClient' | 'rulesClient' | 'alertsClient' | 'indices' | 'spaceId'
|
||||
>;
|
||||
}) {
|
||||
return await Promise.all([
|
||||
getDataStreamsForEntity({
|
||||
esClient,
|
||||
kuery,
|
||||
index: indices.logs.concat(indices.traces),
|
||||
}),
|
||||
getAlertsForEntity({ entity, rulesClient, alertsClient, start, end, size: 10 }).then(
|
||||
(alertsResponse) => {
|
||||
return alertsResponse.hits.hits.map((hit) => hit._source!);
|
||||
}
|
||||
),
|
||||
getSlosForEntity({
|
||||
entity,
|
||||
start,
|
||||
end,
|
||||
esClient,
|
||||
size: 1000,
|
||||
sloSummaryIndices: indices.sloSummaries,
|
||||
spaceId,
|
||||
}).then((slosResponse) => {
|
||||
return slosResponse.hits.hits.map((hit) => hit._source);
|
||||
}),
|
||||
]);
|
||||
}
|
||||
|
||||
async function analyzeDataStreamsForEntity({
|
||||
start,
|
||||
end,
|
||||
dataStreams,
|
||||
esClient,
|
||||
kuery,
|
||||
}: {
|
||||
start: number;
|
||||
end: number;
|
||||
kuery: string;
|
||||
dataStreams: string[];
|
||||
esClient: ObservabilityElasticsearchClient;
|
||||
}) {
|
||||
const analysis = await analyzeDocuments({
|
||||
esClient,
|
||||
start,
|
||||
end,
|
||||
index: dataStreams,
|
||||
kuery,
|
||||
});
|
||||
|
||||
return {
|
||||
...analysis,
|
||||
fields: analysis.fields.filter((field) => !field.empty),
|
||||
};
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { toBlockquote } from '../../util/to_blockquote';
|
||||
|
||||
export const getInvestigateEntityTaskPrompt = ({
|
||||
entity,
|
||||
contextForEntityInvestigation,
|
||||
}: {
|
||||
entity: Record<string, string>;
|
||||
contextForEntityInvestigation: string;
|
||||
}) => `## Entity-Based Investigation: Task Guide
|
||||
|
||||
In the investigation process, you are currently investigating the entity
|
||||
${formatEntity(entity)}. The context given for this investigation is:
|
||||
|
||||
${toBlockquote(contextForEntityInvestigation)}`;
|
|
@ -0,0 +1,31 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ParsedTechnicalFields } from '@kbn/rule-registry-plugin/common';
|
||||
import type { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
|
||||
import type { AnalyzeLogPatternOutput } from '../analyze_log_patterns';
|
||||
import type { RelatedEntityDescription } from '../find_related_entities/extract_related_entities';
|
||||
import type { RelatedEntityKeywordSearch } from '../find_related_entities/write_keyword_searches_for_related_entities';
|
||||
import type { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
|
||||
|
||||
export interface EntityInvestigation {
|
||||
entity: Record<string, string>;
|
||||
summary: string;
|
||||
relatedEntities: RelatedEntityDescription[];
|
||||
attachments: {
|
||||
analysis: TruncatedDocumentAnalysis;
|
||||
slos: Array<
|
||||
Record<string, any> & {
|
||||
status: 'VIOLATED' | 'DEGRADED' | 'HEALTHY' | 'NO_DATA';
|
||||
}
|
||||
>;
|
||||
alerts: ParsedTechnicalFields[];
|
||||
searches: RelatedEntityKeywordSearch[];
|
||||
relatedEntitiesSummaries: string[];
|
||||
kbEntries: ScoredKnowledgeBaseEntry[];
|
||||
} & AnalyzeLogPatternOutput;
|
||||
}
|
|
@ -0,0 +1,239 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { MessageRole } from '@kbn/inference-common';
|
||||
import { RCA_OBSERVE_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis';
|
||||
import { RCA_PROMPT_CHANGES, RCA_PROMPT_ENTITIES, RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
|
||||
import { ObservationToolMessage, RootCauseAnalysisContext } from '../../types';
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { getPreviouslyInvestigatedEntities } from '../../util/get_previously_investigated_entities';
|
||||
import { stringifySummaries } from '../../util/stringify_summaries';
|
||||
import { toBlockquote } from '../../util/to_blockquote';
|
||||
import { EntityInvestigation } from '../investigate_entity/types';
|
||||
|
||||
const INITIAL_OBSERVATION_TASK_GUIDE = `Your current task is to write observations based on the initial context. You
|
||||
should acknowledge the context briefly, and mention key observations from the
|
||||
initial context.
|
||||
|
||||
Then, briefly describe what change you are looking for. Are the symptoms:
|
||||
|
||||
- rapid, or gradual onset?
|
||||
- subtle or prounounced?
|
||||
|
||||
If possible, mention the time of the change.
|
||||
|
||||
When considering the initial context, reason about relevant changes to observe,
|
||||
such as short-lived versus persistent changes or singular events, like scale
|
||||
events, rollouts, or configuration changes.
|
||||
|
||||
After, taking into account the capabilities you have, plan for next steps.
|
||||
|
||||
Describe the next step, which is to investigate the entity found in the initial
|
||||
context. Only mention the entity (as a field/value). Do not mention any
|
||||
additional filters.
|
||||
|
||||
Be brief, accurate, and critical.`;
|
||||
|
||||
const INVESTIGATION_ADDENDUM = `
|
||||
**Task Guide: Observe the investigation results**
|
||||
|
||||
You will receive one or more investigations. These investigations mention:
|
||||
- a general characterization of the entity based on its data
|
||||
- relevant log patterns
|
||||
- other signals, like SLOs or alerts
|
||||
- possibly related entities, and investigation suggestions
|
||||
|
||||
First, you should briefly acknowledge the initial context of the investigation
|
||||
and where it stands.
|
||||
|
||||
Next, you should note key observations from the investigations, and how they relate
|
||||
to the ongoing investigation.
|
||||
|
||||
After, you should generate a timeline of significant events. For this timeline,
|
||||
include events from previous observations. Additionally, include significant
|
||||
events from the inspected investigations. Group events together in a topic
|
||||
if needed. Significant events are things like: an increase in errors, deployment
|
||||
events, a drop to zero for access logs, etc. In most cases, you do not want to
|
||||
mention individual log messages, unless it is a particularly significant event
|
||||
by itself.
|
||||
|
||||
For each event, mention:
|
||||
|
||||
- the timestamp of the event
|
||||
- the nature of the change, if applicable
|
||||
- data from the event, such as specific log patterns, alerts or slos
|
||||
- the meaning of the event and how it is related to the initial context
|
||||
|
||||
Do not include:
|
||||
- the time range from the investigation itself (start/end)
|
||||
- other events that occurred during the investigation itself, like running
|
||||
log analysis or other patterns
|
||||
|
||||
## Correlating significant events
|
||||
|
||||
When correlating significant events, pay close attention to the timestamp of
|
||||
the mentioned change, and how it correlates to the timestamp of the change you
|
||||
want to correlate it to, such as the start time of an alert. An alert might be
|
||||
delayed, but if you see many changes around a specific timestamp, and some of
|
||||
them being significantly earlier, or later, the latter group is likely not
|
||||
relevant.
|
||||
|
||||
## Context and reasoning
|
||||
|
||||
Next, use the timeline of events and the new observations to revise your
|
||||
analysis of the initial context and the ongoing investigation. Reason about
|
||||
how changes could be related: are they close in time, or far removed, compared
|
||||
to others? Is the type of change similar? Is the magnitude of the change similar?`;
|
||||
|
||||
const SUGGEST_NEXT_STEPS_PROMPT = `
|
||||
Next, consider next steps. it's always important to contextualize the significant
|
||||
in the initial context of the investigation. Focus on your strongest pieces of
|
||||
evidence. Your observations should be related to finding out the cause of the
|
||||
initial context of the investigation - you should not concern yourself with the
|
||||
impact on _other_ entities.
|
||||
|
||||
Suggest to conclude the process when:
|
||||
|
||||
- there is a clear and obvious root cause
|
||||
- you have investigated more than 10 entities
|
||||
- OR you cannot find any unhealthy entities
|
||||
- there are no more entities to investigate
|
||||
|
||||
If the conclusion is you need to continue your investigation, mention the entities
|
||||
that should be investigated. Do this only if there is a significant change one of
|
||||
the related entities will give you new insights into the root cause (instead of
|
||||
just the impact). DO NOT investigate an entity more than once.`;
|
||||
|
||||
const CONCLUDE_PROCESS_PROMPT = `
|
||||
You must suggest to conclude the process and write the final report, as your
|
||||
capabilities do not allow you go investigate more entities.`;
|
||||
|
||||
function getInitialPrompts(initialContext: string) {
|
||||
return {
|
||||
system: `${RCA_SYSTEM_PROMPT_BASE}
|
||||
|
||||
${RCA_PROMPT_ENTITIES}
|
||||
|
||||
${RCA_PROMPT_CHANGES}`,
|
||||
input: `## Context
|
||||
|
||||
${initialContext}
|
||||
|
||||
${INITIAL_OBSERVATION_TASK_GUIDE}`,
|
||||
};
|
||||
}
|
||||
|
||||
function getObserveInvestigationsPrompts({
|
||||
investigations,
|
||||
summaries,
|
||||
rcaContext,
|
||||
}: {
|
||||
investigations: EntityInvestigation[];
|
||||
summaries: ObservationStepSummary[];
|
||||
rcaContext: RootCauseAnalysisContext;
|
||||
}) {
|
||||
const previouslyInvestigatedEntities = getPreviouslyInvestigatedEntities(rcaContext);
|
||||
|
||||
const canContinue =
|
||||
summaries.length <= 5 &&
|
||||
investigations.filter((investigation) => 'summary' in investigation).length <= 10;
|
||||
|
||||
const investigationsPrompt = `Observe the following investigations that recently concluded:
|
||||
${investigations
|
||||
.map((investigation, index) => {
|
||||
return `## ${index + 1}: investigation of ${formatEntity(investigation.entity)}
|
||||
|
||||
${toBlockquote(investigation.summary)}
|
||||
|
||||
${
|
||||
investigation.relatedEntities.length
|
||||
? `### Relationships to ${formatEntity(investigation.entity)}
|
||||
|
||||
${toBlockquote(JSON.stringify(investigation.relatedEntities))}
|
||||
|
||||
`
|
||||
: ``
|
||||
}
|
||||
`;
|
||||
})
|
||||
.join('\n\n')}
|
||||
|
||||
${INVESTIGATION_ADDENDUM}
|
||||
|
||||
${
|
||||
canContinue
|
||||
? `${SUGGEST_NEXT_STEPS_PROMPT}
|
||||
|
||||
${
|
||||
previouslyInvestigatedEntities.length
|
||||
? `The following entities have been investigated previously.
|
||||
Do not investigate them again:
|
||||
|
||||
${previouslyInvestigatedEntities.map((entity) => `- ${JSON.stringify(entity)}`).join('\n')}`
|
||||
: ``
|
||||
}
|
||||
|
||||
`
|
||||
: CONCLUDE_PROCESS_PROMPT
|
||||
}
|
||||
|
||||
`;
|
||||
|
||||
const systemPrompt = `${RCA_SYSTEM_PROMPT_BASE}
|
||||
|
||||
${RCA_PROMPT_ENTITIES}
|
||||
|
||||
${stringifySummaries(rcaContext)}`;
|
||||
|
||||
return {
|
||||
system: systemPrompt,
|
||||
input: investigationsPrompt,
|
||||
};
|
||||
}
|
||||
|
||||
export interface ObservationStepSummary {
|
||||
investigations: EntityInvestigation[];
|
||||
content: string;
|
||||
}
|
||||
|
||||
export function observeInvestigationResults({
|
||||
rcaContext,
|
||||
rcaContext: { logger, events, initialContext, inferenceClient, connectorId },
|
||||
investigations,
|
||||
}: {
|
||||
rcaContext: RootCauseAnalysisContext;
|
||||
investigations: EntityInvestigation[];
|
||||
}): Promise<ObservationStepSummary> {
|
||||
const summaries = events
|
||||
.filter((event): event is ObservationToolMessage => {
|
||||
return event.role === MessageRole.Tool && event.name === RCA_OBSERVE_TOOL_NAME;
|
||||
})
|
||||
.map((event) => event.data);
|
||||
|
||||
logger.debug(
|
||||
() =>
|
||||
`Observing ${investigations.length} investigations (${summaries.length} previous summaries)`
|
||||
);
|
||||
|
||||
const { system, input } = investigations.length
|
||||
? getObserveInvestigationsPrompts({ summaries, investigations, rcaContext })
|
||||
: getInitialPrompts(initialContext);
|
||||
|
||||
return inferenceClient
|
||||
.output({
|
||||
id: 'observe',
|
||||
system,
|
||||
input,
|
||||
connectorId,
|
||||
})
|
||||
.then((outputCompleteEvent) => {
|
||||
return {
|
||||
content: outputCompleteEvent.content,
|
||||
investigations,
|
||||
};
|
||||
});
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { InferenceClient } from '@kbn/inference-plugin/server';
|
||||
import { RCA_PROMPT_SIGNIFICANT_EVENTS, RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { toBlockquote } from '../../util/to_blockquote';
|
||||
import { LogPatternDescription } from '../describe_log_patterns';
|
||||
import { getInvestigateEntityTaskPrompt } from '../investigate_entity/prompts';
|
||||
|
||||
export async function writeEntityInvestigationReport({
|
||||
inferenceClient,
|
||||
connectorId,
|
||||
entity,
|
||||
contextForEntityInvestigation,
|
||||
entityDescription,
|
||||
logPatternDescription,
|
||||
}: {
|
||||
inferenceClient: InferenceClient;
|
||||
connectorId: string;
|
||||
entity: Record<string, string>;
|
||||
contextForEntityInvestigation: string;
|
||||
entityDescription: string;
|
||||
logPatternDescription: LogPatternDescription;
|
||||
}): Promise<string> {
|
||||
const system = RCA_SYSTEM_PROMPT_BASE;
|
||||
|
||||
const shouldGenerateTimeline = logPatternDescription.interestingPatternCount > 0;
|
||||
|
||||
let input = `${getInvestigateEntityTaskPrompt({ entity, contextForEntityInvestigation })}
|
||||
|
||||
## Entity description
|
||||
|
||||
${toBlockquote(entityDescription)}
|
||||
|
||||
## Log pattern analysis
|
||||
|
||||
${toBlockquote(logPatternDescription.content)}
|
||||
|
||||
# Current task
|
||||
|
||||
Your current task is to write a report the investigation into ${formatEntity(entity)}.
|
||||
The log pattern analysis and entity description will be added to your report (at the
|
||||
top), so you don't need to repeat anything in it.`;
|
||||
|
||||
if (shouldGenerateTimeline) {
|
||||
input += `${RCA_PROMPT_SIGNIFICANT_EVENTS}\n\n`;
|
||||
}
|
||||
|
||||
input += `## Context and reasoning
|
||||
|
||||
Reason about the role that the entity plays in the investigation, given the context.
|
||||
mention evidence (hard pieces of data) when reasoning.
|
||||
|
||||
Do not suggest next steps - this will happen in a follow-up task.`;
|
||||
|
||||
if (shouldGenerateTimeline) {
|
||||
input += `## Format
|
||||
|
||||
Your reply should only contain two sections:
|
||||
|
||||
- Timeline of significant events
|
||||
- Context and reasoning
|
||||
`;
|
||||
} else {
|
||||
input += `## Format
|
||||
Your reply should only contain one section:
|
||||
- Context and reasoning
|
||||
`;
|
||||
}
|
||||
|
||||
const response = await inferenceClient.output({
|
||||
id: 'generate_entity_report',
|
||||
connectorId,
|
||||
input,
|
||||
system,
|
||||
});
|
||||
|
||||
return response.content;
|
||||
}
|
|
@ -0,0 +1,191 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { RCA_PROMPT_TIMELINE_GUIDE, RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
|
||||
import { RootCauseAnalysisContext } from '../../types';
|
||||
import { stringifySummaries } from '../../util/stringify_summaries';
|
||||
|
||||
const SYSTEM_PROMPT_ADDENDUM = `
|
||||
# Guide: Writing a Root Cause Analysis (RCA) Report
|
||||
|
||||
A Root Cause Analysis (RCA) report is the final step in a thorough
|
||||
investigation. Its purpose is to provide a clear, evidence-backed explanation of
|
||||
the underlying cause of an issue, as well as the impact. Even if no definitive
|
||||
root cause is identified, the report should reflect the findings, the hypotheses
|
||||
considered, and why certain assumptions were rejected. This guide will help
|
||||
structure an RCA that distinguishes between cause and effect, organizes
|
||||
evidence, and presents a timeline of key events.
|
||||
|
||||
---
|
||||
|
||||
## 1. Introduction
|
||||
|
||||
Start by summarizing the reason for the investigation. Provide a brief overview
|
||||
of the incident, the affected services or entities, and the initial alerts or
|
||||
issues that triggered the investigation.
|
||||
|
||||
- **What prompted the investigation?**
|
||||
- **Which entities were investigated?**
|
||||
- **Was there a specific hypothesis proposed at the outset?**
|
||||
|
||||
### Example:
|
||||
- **Overview:** This RCA report investigates the elevated error rates in
|
||||
\`myservice\` and its downstream dependencies, first identified through an SLO
|
||||
breach for the \`/api/submit\` endpoint. The investigation considered multiple
|
||||
entities and possible causes, including resource exhaustion and upstream service
|
||||
failures.
|
||||
|
||||
---
|
||||
|
||||
## 2. Investigation Summary
|
||||
|
||||
Summarize the key steps of the investigation, outlining:
|
||||
- **What hypotheses were proposed and why.**
|
||||
- **Which entities were investigated (e.g., \`myservice\`, \`myotherservice\`,
|
||||
\`notification-service\`).**
|
||||
- **Which hypotheses were discarded and why.**
|
||||
|
||||
For each hypothesis, present the supporting or contradicting evidence.
|
||||
|
||||
- **Strong Indicators:** Clear, repeated evidence pointing toward or against a
|
||||
hypothesis.
|
||||
- **Weak Indicators:** Inconsistent or ambiguous data that did not provide
|
||||
conclusive answers.
|
||||
|
||||
#### Example Format:
|
||||
- **Hypothesis 1:** Resource exhaustion in \`myservice\` caused elevated error
|
||||
rates.
|
||||
- **Evidence:**
|
||||
- **Strong:** Memory usage exceeded 90% during the incident.
|
||||
- **Weak:** CPU usage remained stable, making resource exhaustion a partial
|
||||
explanation.
|
||||
|
||||
- **Hypothesis 2:** Upstream latency from \`myotherservice\` caused delays.
|
||||
- **Evidence:**
|
||||
- **Strong:** API logs showed frequent retries and timeouts from
|
||||
\`myotherservice\`.
|
||||
- **Weak:** No errors were observed in \`myotherservice\` logs, suggesting an
|
||||
issue isolated to \`myservice\`.
|
||||
|
||||
---
|
||||
|
||||
## 3. Cause and Effect
|
||||
|
||||
Differentiate between the **cause** (what initiated the issue) and the
|
||||
**effect** (the impact or symptoms seen across the system). The cause should
|
||||
focus on the root, while the effect describes the wider system response or
|
||||
failure.
|
||||
|
||||
- **Root Cause:** Identify the underlying problem, supported by strong evidence.
|
||||
If no root cause is found, clearly state that the investigation did not lead to
|
||||
a conclusive root cause.
|
||||
|
||||
- **Impact:** Describe the downstream effects on other services, performance
|
||||
degradation, or SLO violations.
|
||||
|
||||
#### Example:
|
||||
- **Cause:** The root cause of the elevated error rate was identified as a
|
||||
memory leak in \`myservice\` that gradually led to resource exhaustion.
|
||||
- **Effect:** This led to elevated latency and increased error rates at the
|
||||
\`/api/submit\` endpoint, impacting downstream services like
|
||||
\`notification-service\` that rely on responses from \`myservice\`.
|
||||
|
||||
---
|
||||
|
||||
## 4. Evidence for Root Cause
|
||||
|
||||
Present a structured section summarizing all the evidence that supports the
|
||||
identified root cause. If no root cause is identified, outline the most
|
||||
significant findings that guided or limited the investigation.
|
||||
|
||||
- **Log Patterns:** Describe any abnormal log patterns observed, including
|
||||
notable change points.
|
||||
- **Alerts and SLOs:** Mention any alerts or breached SLOs that were triggered,
|
||||
including their relevance to the investigation.
|
||||
- **Data Analysis:** Include any data trends or patterns that were analyzed
|
||||
(e.g., resource usage spikes, network traffic).
|
||||
|
||||
#### Example:
|
||||
- **Memory Usage:** Logs showed a steady increase in memory consumption starting
|
||||
at 10:00 AM, peaking at 12:00 PM, where memory usage surpassed 90%, triggering
|
||||
the alert.
|
||||
- **Error Rate Logs:** Error rates for \`/api/submit\` began increasing around
|
||||
11:30 AM, correlating with the memory pressure in \`myservice\`.
|
||||
- **API Logs:** \`myotherservice\` API logs showed no internal errors, ruling out
|
||||
an upstream dependency as the primary cause.
|
||||
|
||||
---
|
||||
|
||||
## 5. Proposed Impact
|
||||
|
||||
Even if the root cause is clear, it is important to mention the impact of the
|
||||
issue on the system, users, and business operations. This includes:
|
||||
- **Affected Services:** Identify the services impacted (e.g., downstream
|
||||
dependencies).
|
||||
- **Performance Degradation:** Describe any SLO breaches or performance
|
||||
bottlenecks.
|
||||
- **User Impact:** Explain how users or clients were affected (e.g., higher
|
||||
latency, failed transactions).
|
||||
|
||||
#### Example:
|
||||
- **Impact:** The memory leak in \`myservice\` caused service degradation over a
|
||||
2-hour window. This affected \`/api/submit\`, causing delays and failed
|
||||
requests, ultimately impacting user-facing services relying on that endpoint.
|
||||
|
||||
---
|
||||
|
||||
## 6. Timeline of Significant Events
|
||||
|
||||
${RCA_PROMPT_TIMELINE_GUIDE}
|
||||
|
||||
---
|
||||
|
||||
## 7. Conclusion and Next Steps
|
||||
|
||||
Summarize the conclusions of the investigation:
|
||||
- If a root cause was identified, confirm it with the strongest supporting
|
||||
evidence.
|
||||
- If no root cause was found, state that clearly and suggest areas for further
|
||||
investigation or monitoring.
|
||||
|
||||
Finally, outline the next steps:
|
||||
- **Fixes or Mitigations:** Recommend any immediate actions (e.g., patch
|
||||
deployment, configuration changes).
|
||||
- **Monitoring Improvements:** Suggest new alerts or monitoring metrics based on
|
||||
lessons learned.
|
||||
- **Further Investigations:** If necessary, propose any follow-up investigations
|
||||
to gather more evidence.
|
||||
|
||||
#### Example:
|
||||
- **Conclusion:** The root cause of the incident was a memory leak in
|
||||
\`myservice\`, leading to resource exhaustion and elevated error rates at
|
||||
\`/api/submit\`. The leak has been patched, and monitoring has been improved to
|
||||
detect memory spikes earlier.
|
||||
- **Next Steps:** Monitor memory usage for the next 24 hours to ensure no
|
||||
recurrence. Investigate adding a memory ceiling for \`myservice\` to prevent
|
||||
future resource exhaustion.`;
|
||||
|
||||
export async function writeFinalReport({
|
||||
rcaContext,
|
||||
}: {
|
||||
rcaContext: RootCauseAnalysisContext;
|
||||
}): Promise<string> {
|
||||
const { inferenceClient, connectorId } = rcaContext;
|
||||
|
||||
return await inferenceClient
|
||||
.output({
|
||||
id: 'write_final_report',
|
||||
connectorId,
|
||||
system: `${RCA_SYSTEM_PROMPT_BASE}
|
||||
|
||||
${SYSTEM_PROMPT_ADDENDUM}`,
|
||||
input: `Write the RCA report, based on the observations.
|
||||
|
||||
${stringifySummaries(rcaContext)}`,
|
||||
})
|
||||
.then((event) => event.content);
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import {
|
||||
RCA_END_PROCESS_TOOL_NAME,
|
||||
RCA_INVESTIGATE_ENTITY_TOOL_NAME,
|
||||
RCA_OBSERVE_TOOL_NAME,
|
||||
} from '@kbn/observability-ai-common/root_cause_analysis/tool_names';
|
||||
|
||||
export const RCA_TOOLS = {
|
||||
[RCA_OBSERVE_TOOL_NAME]: {
|
||||
description: `Request an observation from another agent on
|
||||
the results of the returned investigations. The title should
|
||||
cover key new observations from the initial context or
|
||||
completed investigations, not anything about next steps.`,
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
title: {
|
||||
type: 'string',
|
||||
description: `A short title w/ the key new observations that will be displayed on top of a collapsible panel.`,
|
||||
},
|
||||
},
|
||||
required: ['title'],
|
||||
},
|
||||
},
|
||||
[RCA_END_PROCESS_TOOL_NAME]: {
|
||||
description: `End the RCA process by requesting a
|
||||
written report from another agent`,
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
endProcess: {
|
||||
type: 'boolean',
|
||||
},
|
||||
},
|
||||
required: ['endProcess'],
|
||||
},
|
||||
},
|
||||
[RCA_INVESTIGATE_ENTITY_TOOL_NAME]: {
|
||||
description: `Investigate an entity`,
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
context: {
|
||||
type: 'string',
|
||||
description: `Context that will be used in the investigation of the entity. Mention the initial context
|
||||
of the investigation, a very short summary of the last observation if applicable, and pieces
|
||||
of data that can be relevant for the investigation into the entity, such as timestamps or
|
||||
keywords`,
|
||||
},
|
||||
entity: {
|
||||
type: 'object',
|
||||
description: `The entity you want to investigate, such as a service. Use
|
||||
the Elasticsearch field names and values. For example, for services, use
|
||||
the following structure: ${JSON.stringify({
|
||||
entity: { field: 'service.name', value: 'opbeans-java' },
|
||||
})}`,
|
||||
properties: {
|
||||
field: {
|
||||
type: 'string',
|
||||
},
|
||||
value: {
|
||||
type: 'string',
|
||||
},
|
||||
},
|
||||
required: ['field', 'value'],
|
||||
},
|
||||
},
|
||||
required: ['context', 'entity'],
|
||||
},
|
||||
},
|
||||
} as const;
|
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import {
|
||||
ToolMessage,
|
||||
UserMessage,
|
||||
ToolCallsOf,
|
||||
ToolChoice,
|
||||
AssistantMessageOf,
|
||||
} from '@kbn/inference-common';
|
||||
import { InferenceClient } from '@kbn/inference-plugin/server';
|
||||
import { Logger } from '@kbn/logging';
|
||||
import { AlertsClient } from '@kbn/rule-registry-plugin/server';
|
||||
import { RulesClient } from '@kbn/alerting-plugin/server';
|
||||
import { ObservabilityAIAssistantClient } from '@kbn/observability-ai-assistant-plugin/server';
|
||||
import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client';
|
||||
import {
|
||||
RCA_END_PROCESS_TOOL_NAME,
|
||||
RCA_INVESTIGATE_ENTITY_TOOL_NAME,
|
||||
RCA_OBSERVE_TOOL_NAME,
|
||||
} from '@kbn/observability-ai-common/root_cause_analysis';
|
||||
import { ObservationStepSummary } from './tasks/observe_investigation_results';
|
||||
import { EntityInvestigation } from './tasks/investigate_entity';
|
||||
import { SignificantEventsTimeline } from './tasks/generate_timeline';
|
||||
import { RCA_TOOLS } from './tools';
|
||||
|
||||
export type EndProcessToolMessage = ToolMessage<
|
||||
typeof RCA_END_PROCESS_TOOL_NAME,
|
||||
{
|
||||
report: string;
|
||||
timeline: SignificantEventsTimeline;
|
||||
}
|
||||
>;
|
||||
|
||||
export type ObservationToolMessage = ToolMessage<
|
||||
typeof RCA_OBSERVE_TOOL_NAME,
|
||||
{
|
||||
content: string;
|
||||
},
|
||||
ObservationStepSummary
|
||||
>;
|
||||
|
||||
export type InvestigateEntityToolMessage = ToolMessage<
|
||||
typeof RCA_INVESTIGATE_ENTITY_TOOL_NAME,
|
||||
Pick<EntityInvestigation, 'entity' | 'summary' | 'relatedEntities'>,
|
||||
{ attachments: EntityInvestigation['attachments'] }
|
||||
>;
|
||||
|
||||
export type ToolErrorMessage = ToolMessage<
|
||||
'error',
|
||||
{
|
||||
error: {
|
||||
message: string;
|
||||
};
|
||||
}
|
||||
>;
|
||||
|
||||
export type RootCauseAnalysisEvent =
|
||||
| RootCauseAnalysisToolMessage
|
||||
| ToolErrorMessage
|
||||
| UserMessage
|
||||
| AssistantMessageOf<{
|
||||
tools: typeof RCA_TOOLS;
|
||||
toolChoice?: ToolChoice<keyof typeof RCA_TOOLS>;
|
||||
}>;
|
||||
|
||||
export type RootCauseAnalysisToolRequest<
|
||||
TToolName extends keyof typeof RCA_TOOLS = keyof typeof RCA_TOOLS
|
||||
> = ToolCallsOf<{
|
||||
tools: Pick<typeof RCA_TOOLS, TToolName>;
|
||||
}>['toolCalls'][number];
|
||||
|
||||
export type RootCauseAnalysisToolMessage =
|
||||
| EndProcessToolMessage
|
||||
| InvestigateEntityToolMessage
|
||||
| ObservationToolMessage;
|
||||
|
||||
export interface RootCauseAnalysisContext {
|
||||
initialContext: string;
|
||||
start: number;
|
||||
end: number;
|
||||
events: RootCauseAnalysisEvent[];
|
||||
indices: {
|
||||
logs: string[];
|
||||
traces: string[];
|
||||
sloSummaries: string[];
|
||||
};
|
||||
inferenceClient: InferenceClient;
|
||||
tokenLimit: number;
|
||||
connectorId: string;
|
||||
esClient: ObservabilityElasticsearchClient;
|
||||
alertsClient: AlertsClient;
|
||||
rulesClient: RulesClient;
|
||||
logger: Logger;
|
||||
spaceId: string;
|
||||
observabilityAIAssistantClient: ObservabilityAIAssistantClient;
|
||||
}
|
|
@ -0,0 +1,177 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import {
|
||||
Message,
|
||||
ToolDefinition,
|
||||
ToolChoice,
|
||||
ToolCallsOf,
|
||||
withoutChunkEvents,
|
||||
withoutTokenCountEvents,
|
||||
ToolMessage,
|
||||
MessageOf,
|
||||
MessageRole,
|
||||
} from '@kbn/inference-common';
|
||||
import { InferenceClient } from '@kbn/inference-plugin/server';
|
||||
import { Logger } from '@kbn/logging';
|
||||
import {
|
||||
defer,
|
||||
last,
|
||||
merge,
|
||||
Observable,
|
||||
of,
|
||||
OperatorFunction,
|
||||
share,
|
||||
switchMap,
|
||||
toArray,
|
||||
} from 'rxjs';
|
||||
|
||||
interface CallToolOptions extends CallToolTools {
|
||||
system: string;
|
||||
messages: Message[];
|
||||
inferenceClient: InferenceClient;
|
||||
connectorId: string;
|
||||
logger: Logger;
|
||||
}
|
||||
|
||||
interface CallToolTools {
|
||||
tools: Record<string, ToolDefinition>;
|
||||
toolChoice?: ToolChoice;
|
||||
}
|
||||
|
||||
type CallbackOf<
|
||||
TCallToolTools extends CallToolTools,
|
||||
TEmittedMessage extends Message
|
||||
> = (parameters: {
|
||||
messages: Message[];
|
||||
toolCalls: ToolCallsOf<TCallToolTools>['toolCalls'];
|
||||
}) => Observable<TEmittedMessage>;
|
||||
|
||||
type GetNextRequestCallback<TCallToolTools extends CallToolTools> = ({
|
||||
messages,
|
||||
system,
|
||||
}: {
|
||||
messages: Message[];
|
||||
system: string;
|
||||
}) => { system: string; messages: Message[] } & TCallToolTools;
|
||||
|
||||
export function callTools<TCallToolOptions extends CallToolOptions>(
|
||||
{ system, messages, inferenceClient, connectorId, tools, toolChoice, logger }: TCallToolOptions,
|
||||
callback: CallbackOf<TCallToolOptions, ToolMessage>
|
||||
): Observable<MessageOf<TCallToolOptions>>;
|
||||
|
||||
export function callTools<
|
||||
TCallToolOptions extends Omit<CallToolOptions, 'tools' | 'toolChoice'> = never,
|
||||
TCallToolTools extends CallToolTools = never,
|
||||
TEmittedMessage extends Message = never
|
||||
>(
|
||||
options: TCallToolOptions,
|
||||
getNextRequest: GetNextRequestCallback<TCallToolTools>,
|
||||
callback: CallbackOf<TCallToolTools, TEmittedMessage>
|
||||
): Observable<TEmittedMessage>;
|
||||
|
||||
export function callTools(
|
||||
{ system, messages, inferenceClient, connectorId, tools, toolChoice, logger }: CallToolOptions,
|
||||
...callbacks:
|
||||
| [GetNextRequestCallback<CallToolTools>, CallbackOf<CallToolOptions, ToolMessage>]
|
||||
| [CallbackOf<CallToolTools, ToolMessage>]
|
||||
): Observable<Message> {
|
||||
const callback = callbacks.length === 2 ? callbacks[1] : callbacks[0];
|
||||
|
||||
const getNextRequest =
|
||||
callbacks.length === 2
|
||||
? callbacks[0]
|
||||
: (next: { messages: Message[]; system: string }) => {
|
||||
return {
|
||||
...next,
|
||||
tools,
|
||||
toolChoice,
|
||||
};
|
||||
};
|
||||
|
||||
const nextRequest = getNextRequest({ system, messages });
|
||||
|
||||
const chatComplete$ = defer(() =>
|
||||
inferenceClient.chatComplete({
|
||||
connectorId,
|
||||
stream: true,
|
||||
...nextRequest,
|
||||
})
|
||||
);
|
||||
|
||||
const asCompletedMessages$ = chatComplete$.pipe(
|
||||
withoutChunkEvents(),
|
||||
withoutTokenCountEvents(),
|
||||
switchMap((event) => {
|
||||
return of({
|
||||
role: MessageRole.Assistant as const,
|
||||
content: event.content,
|
||||
toolCalls: event.toolCalls,
|
||||
});
|
||||
})
|
||||
);
|
||||
|
||||
const withToolResponses$ = asCompletedMessages$
|
||||
.pipe(
|
||||
switchMap((message) => {
|
||||
if (message.toolCalls.length) {
|
||||
return merge(
|
||||
of(message),
|
||||
callback({ toolCalls: message.toolCalls, messages: messages.concat(message) })
|
||||
);
|
||||
}
|
||||
return of(message);
|
||||
})
|
||||
)
|
||||
.pipe(handleNext());
|
||||
|
||||
return withToolResponses$;
|
||||
|
||||
function handleNext(): OperatorFunction<Message, Message> {
|
||||
return (source$) => {
|
||||
const shared$ = source$.pipe(share());
|
||||
|
||||
const next$ = merge(
|
||||
shared$,
|
||||
shared$.pipe(
|
||||
toArray(),
|
||||
last(),
|
||||
switchMap((nextMessages) => {
|
||||
logger.debug(() =>
|
||||
JSON.stringify(
|
||||
nextMessages.map((message) => {
|
||||
return {
|
||||
role: message.role,
|
||||
toolCalls: 'toolCalls' in message ? message.toolCalls : undefined,
|
||||
toolCallId: 'toolCallId' in message ? message.toolCallId : undefined,
|
||||
};
|
||||
})
|
||||
)
|
||||
);
|
||||
|
||||
if (nextMessages[nextMessages.length - 1].role !== MessageRole.Assistant) {
|
||||
const options: CallToolOptions = {
|
||||
system,
|
||||
connectorId,
|
||||
inferenceClient,
|
||||
messages: messages.concat(nextMessages),
|
||||
tools,
|
||||
toolChoice,
|
||||
logger,
|
||||
};
|
||||
const after$ = callTools(options, getNextRequest, callback);
|
||||
return after$;
|
||||
}
|
||||
return of();
|
||||
})
|
||||
)
|
||||
);
|
||||
|
||||
return next$;
|
||||
};
|
||||
}
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { encode } from 'gpt-tokenizer';
|
||||
import { uniqueId } from 'lodash';
|
||||
|
||||
interface TextWithId {
|
||||
id: string;
|
||||
text: string;
|
||||
}
|
||||
|
||||
interface Parameters {
|
||||
system: string;
|
||||
input: string;
|
||||
tokenLimit: number;
|
||||
}
|
||||
|
||||
interface ChunkedOutputRequest {
|
||||
input: string;
|
||||
system: string;
|
||||
}
|
||||
|
||||
export function chunkOutputCalls({}: Parameters & { texts: string[] }): Array<
|
||||
ChunkedOutputRequest & {
|
||||
texts: string[];
|
||||
}
|
||||
>;
|
||||
|
||||
export function chunkOutputCalls({}: Parameters & { texts: TextWithId[] }): Array<
|
||||
ChunkedOutputRequest & {
|
||||
texts: TextWithId[];
|
||||
}
|
||||
>;
|
||||
|
||||
export function chunkOutputCalls({
|
||||
system,
|
||||
input,
|
||||
texts,
|
||||
tokenLimit,
|
||||
}: Parameters & {
|
||||
texts: string[] | TextWithId[];
|
||||
}) {
|
||||
const inputAndSystemPromptCount = encode(system).length + encode(input).length;
|
||||
|
||||
if (!texts.length) {
|
||||
return [{ system, input, texts: [] }];
|
||||
}
|
||||
|
||||
const textWithIds = texts.map((text) => {
|
||||
if (typeof text === 'string') {
|
||||
return {
|
||||
id: uniqueId(),
|
||||
text,
|
||||
};
|
||||
}
|
||||
return text;
|
||||
});
|
||||
|
||||
const textsWithCount = textWithIds.map(({ text, id }) => ({
|
||||
tokenCount: encode(text).length,
|
||||
text,
|
||||
id,
|
||||
}));
|
||||
|
||||
const chunks: Array<{ tokenCount: number; texts: TextWithId[] }> = [];
|
||||
|
||||
textsWithCount.forEach(({ text, id, tokenCount }) => {
|
||||
let chunkWithRoomLeft = chunks.find((chunk) => {
|
||||
return chunk.tokenCount + tokenCount <= tokenLimit;
|
||||
});
|
||||
|
||||
if (!chunkWithRoomLeft) {
|
||||
chunkWithRoomLeft = { texts: [], tokenCount: inputAndSystemPromptCount };
|
||||
chunks.push(chunkWithRoomLeft);
|
||||
}
|
||||
chunkWithRoomLeft.texts.push({ text, id });
|
||||
chunkWithRoomLeft.tokenCount += tokenCount;
|
||||
});
|
||||
|
||||
const hasTextWithIds = texts.some((text) => typeof text !== 'string');
|
||||
|
||||
return chunks.map((chunk) => {
|
||||
const textsForChunk = hasTextWithIds
|
||||
? chunk.texts
|
||||
: chunk.texts.map((text) => (typeof text === 'string' ? text : text.text));
|
||||
|
||||
return {
|
||||
system,
|
||||
input,
|
||||
texts: textsForChunk,
|
||||
};
|
||||
});
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
export function formatEntity(entity: Record<string, string>) {
|
||||
return Object.entries(entity)
|
||||
.map(([field, value]) => `${field}:${value}`)
|
||||
.join('/');
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { MessageRole } from '@kbn/inference-common';
|
||||
import { RCA_INVESTIGATE_ENTITY_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis';
|
||||
import { InvestigateEntityToolMessage, RootCauseAnalysisContext } from '../types';
|
||||
|
||||
export function getPreviouslyInvestigatedEntities({
|
||||
events,
|
||||
}: Pick<RootCauseAnalysisContext, 'events'>) {
|
||||
const investigationToolResponses = events.filter(
|
||||
(event): event is InvestigateEntityToolMessage => {
|
||||
return event.role === MessageRole.Tool && event.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME;
|
||||
}
|
||||
);
|
||||
|
||||
return investigationToolResponses.map((event) => event.response.entity);
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { ScoredKnowledgeBaseEntry } from '../tasks/get_knowledge_base_entries';
|
||||
import { toBlockquote } from './to_blockquote';
|
||||
|
||||
export function serializeKnowledgeBaseEntries(entries: ScoredKnowledgeBaseEntry[]) {
|
||||
if (!entries.length) {
|
||||
return `## Knowledge base
|
||||
|
||||
No relevant knowledge base entries were found.
|
||||
`;
|
||||
}
|
||||
|
||||
const serializedEntries = entries
|
||||
.filter((entry) => entry.score >= 3)
|
||||
.map(
|
||||
(entry) => `## Entry \`${entry.id}\ (score: ${entry.score}, ${
|
||||
entry.truncated ? `truncated` : `not truncated`
|
||||
})
|
||||
|
||||
${toBlockquote(entry.text)}`
|
||||
);
|
||||
|
||||
return `## Knowledge base
|
||||
|
||||
The following relevant entries were found in the knowledge base
|
||||
|
||||
${serializedEntries.join('\n\n')}`;
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { compact } from 'lodash';
|
||||
import { MessageRole } from '@kbn/inference-common';
|
||||
import { RCA_OBSERVE_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis';
|
||||
import { formatEntity } from './format_entity';
|
||||
import { toBlockquote } from './to_blockquote';
|
||||
import { ObservationToolMessage, RootCauseAnalysisContext } from '../types';
|
||||
|
||||
export function stringifySummaries({ events }: RootCauseAnalysisContext): string {
|
||||
const summaries = events
|
||||
.filter((event): event is ObservationToolMessage => {
|
||||
return event.role === MessageRole.Tool && event.name === RCA_OBSERVE_TOOL_NAME;
|
||||
})
|
||||
.map((event) => event.data);
|
||||
|
||||
if (!summaries.length) {
|
||||
return `# Previous observations
|
||||
|
||||
No previous observations`;
|
||||
}
|
||||
|
||||
return `# Previous observations
|
||||
|
||||
${summaries.map((summary, index) => {
|
||||
const header = `## Observation #${index + 1}`;
|
||||
|
||||
const entitiesHeader = summary.investigations.length
|
||||
? `### Investigated entities
|
||||
|
||||
${summary.investigations
|
||||
.map((investigation) => `- ${formatEntity(investigation.entity)}`)
|
||||
.join('\n')}`
|
||||
: undefined;
|
||||
|
||||
const summaryBody = `### Summary
|
||||
|
||||
${toBlockquote(summary.content)}`;
|
||||
|
||||
return compact([header, entitiesHeader, summaryBody]).join('\n\n');
|
||||
})}`;
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
export function toBlockquote(input: string): string {
|
||||
return input
|
||||
.split('\n')
|
||||
.map((line) => `> ${line}`)
|
||||
.join('\n');
|
||||
}
|
|
@ -0,0 +1,124 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { MessageRole, ToolCallsOf } from '@kbn/inference-common';
|
||||
import { entityQuery } from '@kbn/observability-utils-common/es/queries/entity_query';
|
||||
import { RCA_INVESTIGATE_ENTITY_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis';
|
||||
import { isEqual } from 'lodash';
|
||||
import { getEntitiesByFuzzySearch } from '@kbn/observability-utils-server/entities/get_entities_by_fuzzy_search';
|
||||
import { RCA_TOOLS } from '../tools';
|
||||
import {
|
||||
InvestigateEntityToolMessage,
|
||||
RootCauseAnalysisContext,
|
||||
RootCauseAnalysisToolRequest,
|
||||
} from '../types';
|
||||
import { formatEntity } from './format_entity';
|
||||
|
||||
interface EntityExistsResultExists {
|
||||
exists: true;
|
||||
entity: Record<string, string>;
|
||||
}
|
||||
|
||||
interface EntityExistsResultDoesNotExist {
|
||||
exists: false;
|
||||
entity: Record<string, string>;
|
||||
suggestions: string[];
|
||||
}
|
||||
|
||||
type EntityExistsResult = EntityExistsResultExists | EntityExistsResultDoesNotExist;
|
||||
|
||||
export async function validateInvestigateEntityToolCalls({
|
||||
rcaContext,
|
||||
toolCalls,
|
||||
}: {
|
||||
rcaContext: Pick<RootCauseAnalysisContext, 'esClient' | 'indices' | 'start' | 'end' | 'events'>;
|
||||
toolCalls: RootCauseAnalysisToolRequest[];
|
||||
}) {
|
||||
const { events, esClient, indices, start, end } = rcaContext;
|
||||
|
||||
const previouslyInvestigatedEntities = events
|
||||
.filter(
|
||||
(event): event is InvestigateEntityToolMessage =>
|
||||
event.role === MessageRole.Tool && event.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME
|
||||
)
|
||||
.map((toolResponse) => toolResponse.response.entity);
|
||||
|
||||
const investigateEntityToolCalls = toolCalls.filter(
|
||||
(
|
||||
toolCall
|
||||
): toolCall is ToolCallsOf<{
|
||||
tools: Pick<typeof RCA_TOOLS, typeof RCA_INVESTIGATE_ENTITY_TOOL_NAME>;
|
||||
}>['toolCalls'][number] => toolCall.function.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME
|
||||
);
|
||||
|
||||
if (!investigateEntityToolCalls.length) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const entitiesToInvestigate = investigateEntityToolCalls.map((toolCall) => {
|
||||
const { entity: entityToInvestigate } = toolCall.function.arguments;
|
||||
return {
|
||||
[entityToInvestigate.field]: entityToInvestigate.value,
|
||||
};
|
||||
});
|
||||
const entityExistsResponses: EntityExistsResult[] = await Promise.all(
|
||||
entitiesToInvestigate.map(async (entity) => {
|
||||
const response = await esClient.search('find_data_for_entity', {
|
||||
track_total_hits: 1,
|
||||
size: 0,
|
||||
timeout: '1ms',
|
||||
index: indices.logs.concat(indices.traces),
|
||||
query: {
|
||||
bool: {
|
||||
filter: [...entityQuery(entity)],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const exists = response.hits.total.value > 0;
|
||||
if (!exists) {
|
||||
return getEntitiesByFuzzySearch({
|
||||
start,
|
||||
end,
|
||||
esClient,
|
||||
index: indices.logs.concat(indices.traces),
|
||||
entity,
|
||||
}).then((suggestions) => {
|
||||
return {
|
||||
entity,
|
||||
exists,
|
||||
suggestions,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
return { entity, exists };
|
||||
})
|
||||
);
|
||||
|
||||
const alreadyInvestigatedEntities = entitiesToInvestigate.filter((entity) => {
|
||||
return previouslyInvestigatedEntities.some((prevEntity) => isEqual(entity, prevEntity));
|
||||
});
|
||||
|
||||
const errors = [
|
||||
...entityExistsResponses
|
||||
.filter(
|
||||
(entityExistsResult): entityExistsResult is EntityExistsResultDoesNotExist =>
|
||||
!entityExistsResult.exists
|
||||
)
|
||||
.map(({ suggestions, entity }) => {
|
||||
return `Entity ${formatEntity(
|
||||
entity
|
||||
)} does not exist. Did you mean one of ${suggestions.join(', ')}?`;
|
||||
}),
|
||||
...alreadyInvestigatedEntities.map((entity) => {
|
||||
return `Entity ${formatEntity(entity)} was already investigated before.`;
|
||||
}),
|
||||
];
|
||||
|
||||
return errors;
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
{
|
||||
"extends": "../../../../../tsconfig.base.json",
|
||||
"compilerOptions": {
|
||||
"outDir": "target/types",
|
||||
"types": [
|
||||
"jest",
|
||||
"node",
|
||||
"react"
|
||||
]
|
||||
},
|
||||
"include": [
|
||||
"**/*.ts"
|
||||
],
|
||||
"exclude": [
|
||||
"target/**/*"
|
||||
],
|
||||
"kbn_references": [
|
||||
"@kbn/observability-utils-common",
|
||||
"@kbn/alerting-plugin",
|
||||
"@kbn/rule-registry-plugin",
|
||||
"@kbn/inference-plugin",
|
||||
"@kbn/logging",
|
||||
"@kbn/calculate-auto",
|
||||
"@kbn/observability-ai-assistant-plugin",
|
||||
"@kbn/inference-common",
|
||||
"@kbn/observability-ai-common",
|
||||
"@kbn/observability-utils-server",
|
||||
]
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { castArray, orderBy } from 'lodash';
|
||||
import Fuse from 'fuse.js';
|
||||
import { ObservabilityElasticsearchClient } from '../es/client/create_observability_es_client';
|
||||
|
||||
export async function getEntitiesByFuzzySearch({
|
||||
esClient,
|
||||
entity,
|
||||
start,
|
||||
end,
|
||||
index,
|
||||
}: {
|
||||
esClient: ObservabilityElasticsearchClient;
|
||||
entity: Record<string, string>;
|
||||
start: number;
|
||||
end: number;
|
||||
index: string | string[];
|
||||
}): Promise<string[]> {
|
||||
if (Object.keys(entity).length > 1) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const [field, value] = Object.entries(entity)[0];
|
||||
|
||||
const { terms } = await esClient.client.termsEnum({
|
||||
index: castArray(index).join(','),
|
||||
field,
|
||||
index_filter: {
|
||||
range: {
|
||||
'@timestamp': {
|
||||
gte: new Date(start).toISOString(),
|
||||
lte: new Date(end).toISOString(),
|
||||
},
|
||||
},
|
||||
},
|
||||
size: 10_000,
|
||||
});
|
||||
|
||||
const results = new Fuse(terms, { includeScore: true, threshold: 0.75 }).search(value);
|
||||
|
||||
return orderBy(results, (result) => result.score, 'asc')
|
||||
.slice(0, 5)
|
||||
.map((result) => result.item);
|
||||
}
|
|
@ -0,0 +1,405 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import {
|
||||
AggregationsCategorizeTextAggregation,
|
||||
AggregationsDateHistogramAggregation,
|
||||
AggregationsMaxAggregation,
|
||||
AggregationsMinAggregation,
|
||||
AggregationsTopHitsAggregation,
|
||||
QueryDslQueryContainer,
|
||||
} from '@elastic/elasticsearch/lib/api/types';
|
||||
import { categorizationAnalyzer } from '@kbn/aiops-log-pattern-analysis/categorization_analyzer';
|
||||
import { ChangePointType } from '@kbn/es-types/src';
|
||||
import { pValueToLabel } from '@kbn/observability-utils-common/ml/p_value_to_label';
|
||||
import { calculateAuto } from '@kbn/calculate-auto';
|
||||
import { omit, orderBy, uniqBy } from 'lodash';
|
||||
import moment from 'moment';
|
||||
import { ObservabilityElasticsearchClient } from '../es/client/create_observability_es_client';
|
||||
import { kqlQuery } from '../es/queries/kql_query';
|
||||
import { rangeQuery } from '../es/queries/range_query';
|
||||
|
||||
interface FieldPatternResultBase {
|
||||
field: string;
|
||||
count: number;
|
||||
pattern: string;
|
||||
regex: string;
|
||||
sample: string;
|
||||
firstOccurrence: string;
|
||||
lastOccurrence: string;
|
||||
highlight: Record<string, string[]>;
|
||||
metadata: Record<string, unknown[]>;
|
||||
}
|
||||
|
||||
interface FieldPatternResultChanges {
|
||||
timeseries: Array<{ x: number; y: number }>;
|
||||
change: {
|
||||
timestamp?: string;
|
||||
significance: 'high' | 'medium' | 'low' | null;
|
||||
type: ChangePointType;
|
||||
change_point?: number;
|
||||
p_value?: number;
|
||||
};
|
||||
}
|
||||
|
||||
export type FieldPatternResult<TChanges extends boolean | undefined = undefined> =
|
||||
FieldPatternResultBase & (TChanges extends true ? FieldPatternResultChanges : {});
|
||||
|
||||
export type FieldPatternResultWithChanges = FieldPatternResult<true>;
|
||||
|
||||
interface CategorizeTextOptions {
|
||||
query: QueryDslQueryContainer;
|
||||
metadata: string[];
|
||||
esClient: ObservabilityElasticsearchClient;
|
||||
samplingProbability: number;
|
||||
fields: string[];
|
||||
index: string | string[];
|
||||
useMlStandardTokenizer: boolean;
|
||||
size: number;
|
||||
start: number;
|
||||
end: number;
|
||||
}
|
||||
// eslint-disable-next-line @typescript-eslint/consistent-type-definitions
|
||||
type CategorizeTextSubAggregations = {
|
||||
sample: { top_hits: AggregationsTopHitsAggregation };
|
||||
minTimestamp: { min: AggregationsMinAggregation };
|
||||
maxTimestamp: { max: AggregationsMaxAggregation };
|
||||
};
|
||||
|
||||
interface CategorizeTextAggregationResult {
|
||||
categorize_text: AggregationsCategorizeTextAggregation;
|
||||
aggs: CategorizeTextSubAggregations &
|
||||
(
|
||||
| {}
|
||||
| {
|
||||
timeseries: { date_histogram: AggregationsDateHistogramAggregation };
|
||||
changes: { change_point: { buckets_path: string } };
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
export async function runCategorizeTextAggregation<
|
||||
TChanges extends boolean | undefined = undefined
|
||||
>(
|
||||
options: CategorizeTextOptions & { includeChanges?: TChanges }
|
||||
): Promise<Array<FieldPatternResult<TChanges>>>;
|
||||
|
||||
export async function runCategorizeTextAggregation({
|
||||
esClient,
|
||||
fields,
|
||||
metadata,
|
||||
index,
|
||||
query,
|
||||
samplingProbability,
|
||||
useMlStandardTokenizer,
|
||||
includeChanges,
|
||||
size,
|
||||
start,
|
||||
end,
|
||||
}: CategorizeTextOptions & { includeChanges?: boolean }): Promise<
|
||||
Array<FieldPatternResult<boolean>>
|
||||
> {
|
||||
const aggs = Object.fromEntries(
|
||||
fields.map((field): [string, CategorizeTextAggregationResult] => [
|
||||
field,
|
||||
{
|
||||
categorize_text: {
|
||||
field,
|
||||
min_doc_count: 1,
|
||||
size,
|
||||
categorization_analyzer: useMlStandardTokenizer
|
||||
? {
|
||||
tokenizer: 'ml_standard',
|
||||
char_filter: [
|
||||
{
|
||||
type: 'pattern_replace',
|
||||
pattern: '\\\\n',
|
||||
replacement: '',
|
||||
} as unknown as string,
|
||||
],
|
||||
}
|
||||
: categorizationAnalyzer,
|
||||
},
|
||||
aggs: {
|
||||
minTimestamp: {
|
||||
min: {
|
||||
field: '@timestamp',
|
||||
},
|
||||
},
|
||||
maxTimestamp: {
|
||||
max: {
|
||||
field: '@timestamp',
|
||||
},
|
||||
},
|
||||
...(includeChanges
|
||||
? {
|
||||
timeseries: {
|
||||
date_histogram: {
|
||||
field: '@timestamp',
|
||||
min_doc_count: 0,
|
||||
extended_bounds: {
|
||||
min: start,
|
||||
max: end,
|
||||
},
|
||||
fixed_interval: `${calculateAuto
|
||||
.atLeast(30, moment.duration(end - start, 'ms'))!
|
||||
.asMilliseconds()}ms`,
|
||||
},
|
||||
},
|
||||
changes: {
|
||||
change_point: {
|
||||
buckets_path: 'timeseries>_count',
|
||||
},
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
sample: {
|
||||
top_hits: {
|
||||
size: 1,
|
||||
_source: false,
|
||||
fields: [field, ...metadata],
|
||||
sort: {
|
||||
_score: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
highlight: {
|
||||
fields: {
|
||||
'*': {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
])
|
||||
);
|
||||
|
||||
const response = await esClient.search('get_log_patterns', {
|
||||
index,
|
||||
size: 0,
|
||||
track_total_hits: false,
|
||||
query: {
|
||||
bool: {
|
||||
filter: [query, ...rangeQuery(start, end)],
|
||||
},
|
||||
},
|
||||
aggregations: {
|
||||
sampler: {
|
||||
random_sampler: {
|
||||
probability: samplingProbability,
|
||||
},
|
||||
aggs,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.aggregations) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const fieldAggregates = omit(response.aggregations.sampler, 'seed', 'doc_count', 'probability');
|
||||
|
||||
return Object.entries(fieldAggregates).flatMap(([fieldName, aggregate]) => {
|
||||
const buckets = aggregate.buckets;
|
||||
|
||||
return buckets.map((bucket) => {
|
||||
return {
|
||||
field: fieldName,
|
||||
count: bucket.doc_count,
|
||||
pattern: bucket.key,
|
||||
regex: bucket.regex,
|
||||
sample: bucket.sample.hits.hits[0].fields![fieldName][0] as string,
|
||||
highlight: bucket.sample.hits.hits[0].highlight ?? {},
|
||||
metadata: bucket.sample.hits.hits[0].fields!,
|
||||
firstOccurrence: new Date(bucket.minTimestamp.value!).toISOString(),
|
||||
lastOccurrence: new Date(bucket.maxTimestamp.value!).toISOString(),
|
||||
...('timeseries' in bucket
|
||||
? {
|
||||
timeseries: bucket.timeseries.buckets.map((dateBucket) => ({
|
||||
x: dateBucket.key,
|
||||
y: dateBucket.doc_count,
|
||||
})),
|
||||
change: Object.entries(bucket.changes.type).map(
|
||||
([changePointType, change]): FieldPatternResultChanges['change'] => {
|
||||
return {
|
||||
type: changePointType as ChangePointType,
|
||||
significance:
|
||||
change.p_value !== undefined ? pValueToLabel(change.p_value) : null,
|
||||
change_point: change.change_point,
|
||||
p_value: change.p_value,
|
||||
timestamp:
|
||||
change.change_point !== undefined
|
||||
? bucket.timeseries.buckets[change.change_point].key_as_string
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
)[0],
|
||||
}
|
||||
: {}),
|
||||
};
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
interface LogPatternOptions {
|
||||
esClient: ObservabilityElasticsearchClient;
|
||||
start: number;
|
||||
end: number;
|
||||
index: string | string[];
|
||||
kuery: string;
|
||||
metadata?: string[];
|
||||
fields: string[];
|
||||
}
|
||||
|
||||
export async function getLogPatterns<TChanges extends boolean | undefined = undefined>(
|
||||
options: LogPatternOptions & { includeChanges?: TChanges }
|
||||
): Promise<Array<FieldPatternResult<TChanges>>>;
|
||||
|
||||
export async function getLogPatterns({
|
||||
esClient,
|
||||
start,
|
||||
end,
|
||||
index,
|
||||
kuery,
|
||||
includeChanges,
|
||||
metadata = [],
|
||||
fields,
|
||||
}: LogPatternOptions & { includeChanges?: boolean }): Promise<Array<FieldPatternResult<boolean>>> {
|
||||
const fieldCapsResponse = await esClient.fieldCaps('get_field_caps_for_log_pattern_analysis', {
|
||||
fields,
|
||||
index_filter: {
|
||||
bool: {
|
||||
filter: [...rangeQuery(start, end)],
|
||||
},
|
||||
},
|
||||
index,
|
||||
types: ['text', 'match_only_text'],
|
||||
});
|
||||
|
||||
const fieldsInFieldCaps = Object.keys(fieldCapsResponse.fields);
|
||||
|
||||
if (!fieldsInFieldCaps.length) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const totalDocsResponse = await esClient.search('get_total_docs_for_log_pattern_analysis', {
|
||||
index,
|
||||
size: 0,
|
||||
track_total_hits: true,
|
||||
query: {
|
||||
bool: {
|
||||
filter: [...kqlQuery(kuery), ...rangeQuery(start, end)],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const totalHits = totalDocsResponse.hits.total.value;
|
||||
|
||||
if (totalHits === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
let samplingProbability = 100_000 / totalHits;
|
||||
|
||||
if (samplingProbability >= 0.5) {
|
||||
samplingProbability = 1;
|
||||
}
|
||||
|
||||
const fieldGroups = includeChanges
|
||||
? fieldsInFieldCaps.map((field) => [field])
|
||||
: [fieldsInFieldCaps];
|
||||
|
||||
const allPatterns = await Promise.all(
|
||||
fieldGroups.map(async (fieldGroup) => {
|
||||
const topMessagePatterns = await runCategorizeTextAggregation({
|
||||
esClient,
|
||||
index,
|
||||
fields: fieldGroup,
|
||||
query: {
|
||||
bool: {
|
||||
filter: kqlQuery(kuery),
|
||||
},
|
||||
},
|
||||
samplingProbability,
|
||||
useMlStandardTokenizer: false,
|
||||
size: 100,
|
||||
start,
|
||||
end,
|
||||
includeChanges,
|
||||
metadata,
|
||||
});
|
||||
|
||||
if (topMessagePatterns.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const patternsToExclude = topMessagePatterns.filter((pattern) => {
|
||||
// elasticsearch will barf because the query is too complex. this measures
|
||||
// the # of groups to capture for a measure of complexity.
|
||||
const complexity = pattern.regex.match(/(\.\+\?)|(\.\*\?)/g)?.length ?? 0;
|
||||
return (
|
||||
complexity <= 25 &&
|
||||
// anything less than 50 messages should be re-processed with the ml_standard tokenizer
|
||||
pattern.count > 50
|
||||
);
|
||||
});
|
||||
|
||||
const rareMessagePatterns = await runCategorizeTextAggregation({
|
||||
esClient,
|
||||
index,
|
||||
fields: fieldGroup,
|
||||
start,
|
||||
end,
|
||||
query: {
|
||||
bool: {
|
||||
filter: kqlQuery(kuery),
|
||||
must_not: [
|
||||
...patternsToExclude.map((pattern) => {
|
||||
return {
|
||||
bool: {
|
||||
filter: [
|
||||
{
|
||||
regexp: {
|
||||
[pattern.field]: {
|
||||
value: pattern.regex,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
match: {
|
||||
[pattern.field]: {
|
||||
query: pattern.pattern,
|
||||
fuzziness: 0,
|
||||
operator: 'and' as const,
|
||||
auto_generate_synonyms_phrase_query: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
}),
|
||||
],
|
||||
},
|
||||
},
|
||||
size: 1000,
|
||||
includeChanges,
|
||||
samplingProbability: 1,
|
||||
useMlStandardTokenizer: true,
|
||||
metadata,
|
||||
});
|
||||
|
||||
return [...patternsToExclude, ...rareMessagePatterns];
|
||||
})
|
||||
);
|
||||
|
||||
return uniqBy(
|
||||
orderBy(allPatterns.flat(), (pattern) => pattern.count, 'desc'),
|
||||
(pattern) => pattern.sample
|
||||
);
|
||||
}
|
|
@ -24,6 +24,8 @@
|
|||
"@kbn/alerting-plugin",
|
||||
"@kbn/rule-registry-plugin",
|
||||
"@kbn/rule-data-utils",
|
||||
"@kbn/aiops-log-pattern-analysis",
|
||||
"@kbn/calculate-auto",
|
||||
"@kbn/utility-types",
|
||||
"@kbn/task-manager-plugin",
|
||||
]
|
||||
|
|
|
@ -13,6 +13,9 @@ export {
|
|||
type AssistantMessage,
|
||||
type ToolMessage,
|
||||
type UserMessage,
|
||||
type MessageOf,
|
||||
type AssistantMessageOf,
|
||||
type ToolMessageOf,
|
||||
type ToolSchemaType,
|
||||
type FromToolSchema,
|
||||
type ToolSchema,
|
||||
|
|
|
@ -33,6 +33,9 @@ export {
|
|||
type AssistantMessage,
|
||||
type UserMessage,
|
||||
type ToolMessage,
|
||||
type AssistantMessageOf,
|
||||
type MessageOf,
|
||||
type ToolMessageOf,
|
||||
} from './messages';
|
||||
export { type ToolSchema, type ToolSchemaType, type FromToolSchema } from './tool_schema';
|
||||
export {
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ToolCall } from './tools';
|
||||
import type { ToolCall, ToolCallsOf, ToolNamesOf, ToolOptions, ToolResponsesOf } from './tools';
|
||||
|
||||
/**
|
||||
* Enum for all possible {@link Message} roles.
|
||||
|
@ -52,17 +52,32 @@ export type AssistantMessage = MessageBase<MessageRole.Assistant> & {
|
|||
/**
|
||||
* Represents a tool invocation result, following a request from the LLM to execute a tool.
|
||||
*/
|
||||
export type ToolMessage<TToolResponse extends Record<string, any> | unknown> =
|
||||
MessageBase<MessageRole.Tool> & {
|
||||
/**
|
||||
* The call id matching the {@link ToolCall} this tool message is for.
|
||||
*/
|
||||
toolCallId: string;
|
||||
/**
|
||||
* The response from the tool invocation.
|
||||
*/
|
||||
response: TToolResponse;
|
||||
};
|
||||
export type ToolMessage<
|
||||
TName extends string = string,
|
||||
TToolResponse extends Record<string, any> | unknown = Record<string, any> | unknown,
|
||||
TToolData extends Record<string, any> | undefined = Record<string, any> | undefined
|
||||
> = MessageBase<MessageRole.Tool> & {
|
||||
/*
|
||||
* The name of the tool called. Used for refining the type of the response.
|
||||
*/
|
||||
name: TName;
|
||||
/**
|
||||
* The call id matching the {@link ToolCall} this tool message is for.
|
||||
*/
|
||||
toolCallId: string;
|
||||
/**
|
||||
* The response from the tool invocation.
|
||||
*/
|
||||
response: TToolResponse;
|
||||
} & (TToolData extends undefined
|
||||
? {}
|
||||
: {
|
||||
/**
|
||||
* Additional data from the tool invocation, that is not sent to the LLM
|
||||
* but can be used to attach baggage (such as timeseries or debug data)
|
||||
*/
|
||||
data: TToolData;
|
||||
});
|
||||
|
||||
/**
|
||||
* Mixin composed of all the possible types of messages in a chatComplete discussion.
|
||||
|
@ -72,4 +87,30 @@ export type ToolMessage<TToolResponse extends Record<string, any> | unknown> =
|
|||
* - {@link AssistantMessage}
|
||||
* - {@link ToolMessage}
|
||||
*/
|
||||
export type Message = UserMessage | AssistantMessage | ToolMessage<unknown>;
|
||||
export type Message = UserMessage | AssistantMessage | ToolMessage;
|
||||
|
||||
/**
|
||||
* Utility type to get the Assistant message type of a {@link ToolOptions} type.
|
||||
*/
|
||||
export type AssistantMessageOf<TToolOptions extends ToolOptions> = Omit<
|
||||
AssistantMessage,
|
||||
'toolCalls'
|
||||
> &
|
||||
ToolCallsOf<TToolOptions>;
|
||||
|
||||
/**
|
||||
* Utility type to get the Tool message type of a {@link ToolOptions} type.
|
||||
*/
|
||||
|
||||
export type ToolMessageOf<TToolOptions extends ToolOptions> = ToolMessage<
|
||||
ToolNamesOf<TToolOptions>,
|
||||
ToolResponsesOf<TToolOptions['tools']>
|
||||
>;
|
||||
|
||||
/**
|
||||
* Utility type to get the mixin Message type of a {@link ToolOptions} type.
|
||||
*/
|
||||
export type MessageOf<TToolOptions extends ToolOptions> =
|
||||
| UserMessage
|
||||
| AssistantMessageOf<TToolOptions>
|
||||
| ToolMessageOf<TToolOptions>;
|
||||
|
|
|
@ -8,24 +8,24 @@
|
|||
import type { ValuesType } from 'utility-types';
|
||||
import { FromToolSchema, ToolSchema } from './tool_schema';
|
||||
|
||||
type Assert<TValue, TType> = TValue extends TType ? TValue & TType : never;
|
||||
|
||||
type ToolsOfChoice<TToolOptions extends ToolOptions> = TToolOptions['toolChoice'] extends {
|
||||
function: infer TToolName;
|
||||
}
|
||||
? TToolName extends keyof TToolOptions['tools']
|
||||
? Pick<TToolOptions['tools'], TToolName>
|
||||
? TToolName extends string
|
||||
? Pick<TToolOptions['tools'], TToolName>
|
||||
: TToolOptions['tools']
|
||||
: TToolOptions['tools']
|
||||
: TToolOptions['tools'];
|
||||
|
||||
/**
|
||||
* Utility type to infer the tool calls response shape.
|
||||
*/
|
||||
type ToolResponsesOf<TTools extends Record<string, ToolDefinition> | undefined> =
|
||||
export type ToolResponsesOf<TTools extends Record<string, ToolDefinition> | undefined> =
|
||||
TTools extends Record<string, ToolDefinition>
|
||||
? Array<
|
||||
ValuesType<{
|
||||
[TName in keyof TTools]: ToolResponseOf<Assert<TName, string>, TTools[TName]>;
|
||||
[TName in keyof TTools & string]: ToolCall<TName, ToolResponseOf<TTools[TName]>>;
|
||||
}>
|
||||
>
|
||||
: never[];
|
||||
|
@ -33,10 +33,11 @@ type ToolResponsesOf<TTools extends Record<string, ToolDefinition> | undefined>
|
|||
/**
|
||||
* Utility type to infer the tool call response shape.
|
||||
*/
|
||||
type ToolResponseOf<TName extends string, TToolDefinition extends ToolDefinition> = ToolCall<
|
||||
TName,
|
||||
TToolDefinition extends { schema: ToolSchema } ? FromToolSchema<TToolDefinition['schema']> : {}
|
||||
>;
|
||||
export type ToolResponseOf<TToolDefinition extends ToolDefinition> = TToolDefinition extends {
|
||||
schema: ToolSchema;
|
||||
}
|
||||
? FromToolSchema<TToolDefinition['schema']>
|
||||
: {};
|
||||
|
||||
/**
|
||||
* Tool invocation choice type.
|
||||
|
@ -129,6 +130,10 @@ export interface ToolCall<
|
|||
name: TName;
|
||||
} & (TArguments extends Record<string, any> ? { arguments: TArguments } : {});
|
||||
}
|
||||
/**
|
||||
* Utility type to get the tool names of ToolOptions
|
||||
*/
|
||||
export type ToolNamesOf<TToolOptions extends ToolOptions> = keyof TToolOptions['tools'] & string;
|
||||
|
||||
/**
|
||||
* Tool-related parameters of {@link ChatCompleteAPI}
|
||||
|
|
|
@ -96,6 +96,17 @@ export interface OutputOptions<
|
|||
* Defaults to false.
|
||||
*/
|
||||
stream?: TStream;
|
||||
|
||||
/**
|
||||
* Optional configuration for retrying the call if an error occurs.
|
||||
*/
|
||||
retry?: {
|
||||
/**
|
||||
* Whether to retry on validation errors. Can be a number or retries,
|
||||
* or a boolean, which means one retry.
|
||||
*/
|
||||
onValidationError?: boolean | number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -3,4 +3,4 @@
|
|||
"private": true,
|
||||
"version": "1.0.0",
|
||||
"license": "Elastic License 2.0"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { AggregationsCustomCategorizeTextAnalyzer } from '@elastic/elasticsearch/lib/api/types';
|
||||
|
||||
// This is a copy of the default categorization analyzer but using the 'standard' tokenizer rather than the 'ml_standard' tokenizer.
|
||||
// The 'ml_standard' tokenizer splits tokens in a way that was observed to give better categories in testing many years ago, however,
|
||||
// the downside of these better categories is then a potential failure to match the original documents when creating a filter for Discover.
|
||||
// A future enhancement would be to check which analyzer is specified in the mappings for the source field and to use
|
||||
// that instead of unconditionally using 'standard'.
|
||||
// However for an initial fix, using the standard analyzer will be more likely to match the results from the majority of searches.
|
||||
export const categorizationAnalyzer: AggregationsCustomCategorizeTextAnalyzer = {
|
||||
char_filter: ['first_line_with_letters'],
|
||||
tokenizer: 'standard',
|
||||
filter: [
|
||||
// @ts-expect-error filter type in AggregationsCustomCategorizeTextAnalyzer is incorrect
|
||||
{
|
||||
type: 'stop',
|
||||
stopwords: [
|
||||
'Monday',
|
||||
'Tuesday',
|
||||
'Wednesday',
|
||||
'Thursday',
|
||||
'Friday',
|
||||
'Saturday',
|
||||
'Sunday',
|
||||
'Mon',
|
||||
'Tue',
|
||||
'Wed',
|
||||
'Thu',
|
||||
'Fri',
|
||||
'Sat',
|
||||
'Sun',
|
||||
'January',
|
||||
'February',
|
||||
'March',
|
||||
'April',
|
||||
'May',
|
||||
'June',
|
||||
'July',
|
||||
'August',
|
||||
'September',
|
||||
'October',
|
||||
'November',
|
||||
'December',
|
||||
'Jan',
|
||||
'Feb',
|
||||
'Mar',
|
||||
'Apr',
|
||||
'May',
|
||||
'Jun',
|
||||
'Jul',
|
||||
'Aug',
|
||||
'Sep',
|
||||
'Oct',
|
||||
'Nov',
|
||||
'Dec',
|
||||
'GMT',
|
||||
'UTC',
|
||||
],
|
||||
},
|
||||
// @ts-expect-error filter type in AggregationsCustomCategorizeTextAnalyzer is incorrect
|
||||
{
|
||||
type: 'limit',
|
||||
max_token_count: '100',
|
||||
},
|
||||
],
|
||||
};
|
|
@ -5,16 +5,14 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import type {
|
||||
QueryDslQueryContainer,
|
||||
AggregationsCustomCategorizeTextAnalyzer,
|
||||
} from '@elastic/elasticsearch/lib/api/types';
|
||||
import type { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/types';
|
||||
import type { MappingRuntimeFields } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
|
||||
import { isPopulatedObject } from '@kbn/ml-is-populated-object/src/is_populated_object';
|
||||
|
||||
import type { createRandomSamplerWrapper } from '@kbn/ml-random-sampler-utils';
|
||||
|
||||
import { createDefaultQuery } from '@kbn/aiops-common/create_default_query';
|
||||
import { categorizationAnalyzer } from './categorization_analyzer';
|
||||
|
||||
const CATEGORY_LIMIT = 1000;
|
||||
const EXAMPLE_LIMIT = 4;
|
||||
|
@ -121,67 +119,3 @@ export function createCategoryRequest(
|
|||
},
|
||||
};
|
||||
}
|
||||
|
||||
// This is a copy of the default categorization analyzer but using the 'standard' tokenizer rather than the 'ml_standard' tokenizer.
|
||||
// The 'ml_standard' tokenizer splits tokens in a way that was observed to give better categories in testing many years ago, however,
|
||||
// the downside of these better categories is then a potential failure to match the original documents when creating a filter for Discover.
|
||||
// A future enhancement would be to check which analyzer is specified in the mappings for the source field and to use
|
||||
// that instead of unconditionally using 'standard'.
|
||||
// However for an initial fix, using the standard analyzer will be more likely to match the results from the majority of searches.
|
||||
const categorizationAnalyzer: AggregationsCustomCategorizeTextAnalyzer = {
|
||||
char_filter: ['first_line_with_letters'],
|
||||
tokenizer: 'standard',
|
||||
filter: [
|
||||
// @ts-expect-error filter type in AggregationsCustomCategorizeTextAnalyzer is incorrect
|
||||
{
|
||||
type: 'stop',
|
||||
stopwords: [
|
||||
'Monday',
|
||||
'Tuesday',
|
||||
'Wednesday',
|
||||
'Thursday',
|
||||
'Friday',
|
||||
'Saturday',
|
||||
'Sunday',
|
||||
'Mon',
|
||||
'Tue',
|
||||
'Wed',
|
||||
'Thu',
|
||||
'Fri',
|
||||
'Sat',
|
||||
'Sun',
|
||||
'January',
|
||||
'February',
|
||||
'March',
|
||||
'April',
|
||||
'May',
|
||||
'June',
|
||||
'July',
|
||||
'August',
|
||||
'September',
|
||||
'October',
|
||||
'November',
|
||||
'December',
|
||||
'Jan',
|
||||
'Feb',
|
||||
'Mar',
|
||||
'Apr',
|
||||
'May',
|
||||
'Jun',
|
||||
'Jul',
|
||||
'Aug',
|
||||
'Sep',
|
||||
'Oct',
|
||||
'Nov',
|
||||
'Dec',
|
||||
'GMT',
|
||||
'UTC',
|
||||
],
|
||||
},
|
||||
// @ts-expect-error filter type in AggregationsCustomCategorizeTextAnalyzer is incorrect
|
||||
{
|
||||
type: 'limit',
|
||||
max_token_count: '100',
|
||||
},
|
||||
],
|
||||
};
|
||||
|
|
|
@ -25990,7 +25990,6 @@
|
|||
"xpack.investigateApp.addObservationUI.h2.addAnObservationChartLabel": "Ajouter un graphique d'observation",
|
||||
"xpack.investigateApp.addObservationUI.p.selectADataSourceLabel": "Sélectionnez une source de données pour générer un graphique d'aperçu",
|
||||
"xpack.investigateApp.appTitle": "Investigations",
|
||||
"xpack.investigateApp.assistantHypothesis.observabilityAIAssistantContextualInsight.helpMeInvestigateThisLabel": "Aidez-moi à résoudre la cause de cet échec",
|
||||
"xpack.investigateApp.defaultChart.error_equation.description": "Vérifiez l'équation.",
|
||||
"xpack.investigateApp.defaultChart.error_equation.title": "Une erreur s'est produite lors de l'affichage du graphique",
|
||||
"xpack.investigateApp.defaultChart.noData.title": "Aucune donnée graphique disponible",
|
||||
|
|
|
@ -25849,7 +25849,6 @@
|
|||
"xpack.investigateApp.addObservationUI.h2.addAnObservationChartLabel": "観測グラフを追加",
|
||||
"xpack.investigateApp.addObservationUI.p.selectADataSourceLabel": "データソースを選択して、プレビューグラフを生成",
|
||||
"xpack.investigateApp.appTitle": "調査",
|
||||
"xpack.investigateApp.assistantHypothesis.observabilityAIAssistantContextualInsight.helpMeInvestigateThisLabel": "このエラーの調査を支援",
|
||||
"xpack.investigateApp.defaultChart.error_equation.description": "式を確認してください。",
|
||||
"xpack.investigateApp.defaultChart.error_equation.title": "グラフの表示中にエラーが発生しました",
|
||||
"xpack.investigateApp.defaultChart.noData.title": "グラフデータがありません",
|
||||
|
|
|
@ -25412,7 +25412,6 @@
|
|||
"xpack.investigateApp.addObservationUI.h2.addAnObservationChartLabel": "添加观察图表",
|
||||
"xpack.investigateApp.addObservationUI.p.selectADataSourceLabel": "选择数据源以生成预览图表",
|
||||
"xpack.investigateApp.appTitle": "调查",
|
||||
"xpack.investigateApp.assistantHypothesis.observabilityAIAssistantContextualInsight.helpMeInvestigateThisLabel": "帮助我调查此故障",
|
||||
"xpack.investigateApp.defaultChart.error_equation.description": "检查方程。",
|
||||
"xpack.investigateApp.defaultChart.error_equation.title": "渲染图表时出错",
|
||||
"xpack.investigateApp.defaultChart.noData.title": "没有可用图表数据",
|
||||
|
|
|
@ -12,6 +12,7 @@ import {
|
|||
ChatCompletionEventType,
|
||||
} from '@kbn/inference-common';
|
||||
import { createOutputApi } from './create_output_api';
|
||||
import { createToolValidationError } from '../../server/chat_complete/errors';
|
||||
|
||||
describe('createOutputApi', () => {
|
||||
let chatComplete: jest.Mock;
|
||||
|
@ -119,4 +120,80 @@ describe('createOutputApi', () => {
|
|||
},
|
||||
]);
|
||||
});
|
||||
|
||||
describe('when using retry', () => {
|
||||
const unvalidatedFailedToolCall = {
|
||||
function: {
|
||||
name: 'myFunction',
|
||||
arguments: JSON.stringify({ foo: 'bar' }),
|
||||
},
|
||||
toolCallId: 'foo',
|
||||
};
|
||||
|
||||
const validationError = createToolValidationError('Validation failed', {
|
||||
toolCalls: [unvalidatedFailedToolCall],
|
||||
});
|
||||
|
||||
it('retries once when onValidationError is a boolean', async () => {
|
||||
chatComplete.mockRejectedValueOnce(validationError);
|
||||
chatComplete.mockResolvedValueOnce(
|
||||
Promise.resolve({ content: 'retried content', toolCalls: [unvalidatedFailedToolCall] })
|
||||
);
|
||||
|
||||
const output = createOutputApi(chatComplete);
|
||||
|
||||
const response = await output({
|
||||
id: 'retry-id',
|
||||
stream: false,
|
||||
connectorId: '.retry-connector',
|
||||
input: 'input message',
|
||||
retry: {
|
||||
onValidationError: true,
|
||||
},
|
||||
});
|
||||
|
||||
expect(chatComplete).toHaveBeenCalledTimes(2);
|
||||
expect(response).toEqual({
|
||||
id: 'retry-id',
|
||||
content: 'retried content',
|
||||
output: unvalidatedFailedToolCall.function.arguments,
|
||||
});
|
||||
});
|
||||
|
||||
it('retries the number of specified attempts', async () => {
|
||||
chatComplete.mockRejectedValue(validationError);
|
||||
|
||||
const output = createOutputApi(chatComplete);
|
||||
|
||||
await expect(
|
||||
output({
|
||||
id: 'retry-id',
|
||||
stream: false,
|
||||
connectorId: '.retry-connector',
|
||||
input: 'input message',
|
||||
retry: {
|
||||
onValidationError: 2,
|
||||
},
|
||||
})
|
||||
).rejects.toThrow('Validation failed');
|
||||
|
||||
expect(chatComplete).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
it('throws an error if retry is provided in streaming mode', () => {
|
||||
const output = createOutputApi(chatComplete);
|
||||
|
||||
expect(() =>
|
||||
output({
|
||||
id: 'stream-retry-id',
|
||||
stream: true,
|
||||
connectorId: '.stream-retry-connector',
|
||||
input: 'input message',
|
||||
retry: {
|
||||
onValidationError: 1,
|
||||
},
|
||||
})
|
||||
).toThrowError('Retry options are not supported in streaming mode');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -10,17 +10,22 @@ import {
|
|||
ChatCompletionEventType,
|
||||
MessageRole,
|
||||
OutputAPI,
|
||||
OutputCompositeResponse,
|
||||
OutputEventType,
|
||||
OutputOptions,
|
||||
ToolSchema,
|
||||
isToolValidationError,
|
||||
withoutTokenCountEvents,
|
||||
} from '@kbn/inference-common';
|
||||
import { isObservable, map } from 'rxjs';
|
||||
import { ensureMultiTurn } from '../utils/ensure_multi_turn';
|
||||
|
||||
type DefaultOutputOptions = OutputOptions<string, ToolSchema | undefined, boolean>;
|
||||
|
||||
export function createOutputApi(chatCompleteApi: ChatCompleteAPI): OutputAPI;
|
||||
|
||||
export function createOutputApi(chatCompleteApi: ChatCompleteAPI) {
|
||||
return ({
|
||||
return function callOutputApi({
|
||||
id,
|
||||
connectorId,
|
||||
input,
|
||||
|
@ -29,19 +34,26 @@ export function createOutputApi(chatCompleteApi: ChatCompleteAPI) {
|
|||
previousMessages,
|
||||
functionCalling,
|
||||
stream,
|
||||
}: OutputOptions<string, ToolSchema | undefined, boolean>) => {
|
||||
retry,
|
||||
}: DefaultOutputOptions): OutputCompositeResponse<string, ToolSchema | undefined, boolean> {
|
||||
if (stream && retry !== undefined) {
|
||||
throw new Error(`Retry options are not supported in streaming mode`);
|
||||
}
|
||||
|
||||
const messages = ensureMultiTurn([
|
||||
...(previousMessages || []),
|
||||
{
|
||||
role: MessageRole.User,
|
||||
content: input,
|
||||
},
|
||||
]);
|
||||
|
||||
const response = chatCompleteApi({
|
||||
connectorId,
|
||||
stream,
|
||||
functionCalling,
|
||||
system,
|
||||
messages: ensureMultiTurn([
|
||||
...(previousMessages || []),
|
||||
{
|
||||
role: MessageRole.User,
|
||||
content: input,
|
||||
},
|
||||
]),
|
||||
messages,
|
||||
...(schema
|
||||
? {
|
||||
tools: {
|
||||
|
@ -79,16 +91,55 @@ export function createOutputApi(chatCompleteApi: ChatCompleteAPI) {
|
|||
})
|
||||
);
|
||||
} else {
|
||||
return response.then((chatResponse) => {
|
||||
return {
|
||||
id,
|
||||
content: chatResponse.content,
|
||||
output:
|
||||
chatResponse.toolCalls.length && 'arguments' in chatResponse.toolCalls[0].function
|
||||
? chatResponse.toolCalls[0].function.arguments
|
||||
: undefined,
|
||||
};
|
||||
});
|
||||
return response.then(
|
||||
(chatResponse) => {
|
||||
return {
|
||||
id,
|
||||
content: chatResponse.content,
|
||||
output:
|
||||
chatResponse.toolCalls.length && 'arguments' in chatResponse.toolCalls[0].function
|
||||
? chatResponse.toolCalls[0].function.arguments
|
||||
: undefined,
|
||||
};
|
||||
},
|
||||
(error: Error) => {
|
||||
if (isToolValidationError(error) && retry?.onValidationError) {
|
||||
const retriesLeft =
|
||||
typeof retry.onValidationError === 'number' ? retry.onValidationError : 1;
|
||||
|
||||
return callOutputApi({
|
||||
id,
|
||||
connectorId,
|
||||
input,
|
||||
schema,
|
||||
system,
|
||||
previousMessages: messages.concat(
|
||||
{
|
||||
role: MessageRole.Assistant as const,
|
||||
content: '',
|
||||
toolCalls: error.meta.toolCalls!,
|
||||
},
|
||||
...(error.meta.toolCalls?.map((toolCall) => {
|
||||
return {
|
||||
name: toolCall.function.name,
|
||||
role: MessageRole.Tool as const,
|
||||
toolCallId: toolCall.toolCallId,
|
||||
response: {
|
||||
error: error.meta,
|
||||
},
|
||||
};
|
||||
}) ?? [])
|
||||
),
|
||||
functionCalling,
|
||||
stream: false,
|
||||
retry: {
|
||||
onValidationError: retriesLeft - 1,
|
||||
},
|
||||
}) as OutputCompositeResponse<string, ToolSchema | undefined, false>;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -170,6 +170,7 @@ describe('bedrockClaudeAdapter', () => {
|
|||
],
|
||||
},
|
||||
{
|
||||
name: 'my_function',
|
||||
role: MessageRole.Tool,
|
||||
toolCallId: '0',
|
||||
response: {
|
||||
|
|
|
@ -172,6 +172,7 @@ describe('geminiAdapter', () => {
|
|||
],
|
||||
},
|
||||
{
|
||||
name: 'my_function',
|
||||
role: MessageRole.Tool,
|
||||
toolCallId: '0',
|
||||
response: {
|
||||
|
|
|
@ -142,6 +142,7 @@ describe('openAIAdapter', () => {
|
|||
],
|
||||
},
|
||||
{
|
||||
name: 'my_function',
|
||||
role: MessageRole.Tool,
|
||||
toolCallId: '0',
|
||||
response: {
|
||||
|
|
|
@ -58,7 +58,6 @@ export const openAIAdapter: InferenceConnectorAdapter = {
|
|||
request = {
|
||||
stream,
|
||||
messages: messagesToOpenAI({ system: wrapped.system, messages: wrapped.messages }),
|
||||
temperature: 0,
|
||||
};
|
||||
} else {
|
||||
request = {
|
||||
|
@ -66,7 +65,6 @@ export const openAIAdapter: InferenceConnectorAdapter = {
|
|||
messages: messagesToOpenAI({ system, messages }),
|
||||
tool_choice: toolChoiceToOpenAI(toolChoice),
|
||||
tools: toolsToOpenAI(tools),
|
||||
temperature: 0,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import { last } from 'lodash';
|
||||
import { last, omit } from 'lodash';
|
||||
import { defer, switchMap, throwError } from 'rxjs';
|
||||
import type { Logger } from '@kbn/logging';
|
||||
import type { KibanaRequest } from '@kbn/core-http-server';
|
||||
|
@ -51,14 +51,26 @@ export function createChatCompleteApi({ request, actions, logger }: CreateChatCo
|
|||
const connectorType = connector.type;
|
||||
const inferenceAdapter = getInferenceAdapter(connectorType);
|
||||
|
||||
const messagesWithoutData = messages.map((message) => omit(message, 'data'));
|
||||
|
||||
if (!inferenceAdapter) {
|
||||
return throwError(() =>
|
||||
createInferenceRequestError(`Adapter for type ${connectorType} not implemented`, 400)
|
||||
);
|
||||
}
|
||||
|
||||
logger.debug(() => `Sending request: ${JSON.stringify(last(messages))}`);
|
||||
logger.trace(() => JSON.stringify({ messages, toolChoice, tools, system }));
|
||||
logger.debug(
|
||||
() => `Sending request, last message is: ${JSON.stringify(last(messagesWithoutData))}`
|
||||
);
|
||||
|
||||
logger.trace(() =>
|
||||
JSON.stringify({
|
||||
messages: messagesWithoutData,
|
||||
toolChoice,
|
||||
tools,
|
||||
system,
|
||||
})
|
||||
);
|
||||
|
||||
return inferenceAdapter.chatComplete({
|
||||
system,
|
||||
|
|
|
@ -44,7 +44,7 @@ export function createToolValidationError(
|
|||
name?: string;
|
||||
arguments?: string;
|
||||
errorsText?: string;
|
||||
toolCalls?: UnvalidatedToolCall[];
|
||||
toolCalls: UnvalidatedToolCall[];
|
||||
}
|
||||
): ChatCompletionToolValidationError {
|
||||
return new InferenceTaskError(ChatCompletionErrorCode.ToolValidationError, message, meta);
|
||||
|
|
|
@ -79,7 +79,7 @@ export function wrapWithSimulatedFunctionCalling({
|
|||
};
|
||||
}
|
||||
|
||||
const convertToolResponseMessage = (message: ToolMessage<unknown>): UserMessage => {
|
||||
const convertToolResponseMessage = (message: ToolMessage): UserMessage => {
|
||||
return {
|
||||
role: MessageRole.User,
|
||||
content: JSON.stringify({
|
||||
|
|
|
@ -183,7 +183,7 @@ describe('chunksIntoMessage', () => {
|
|||
}
|
||||
|
||||
await expect(async () => getMessage()).rejects.toThrowErrorMatchingInlineSnapshot(
|
||||
`"Tool call arguments for myFunction were invalid"`
|
||||
`"Tool call arguments for myFunction (001) were invalid"`
|
||||
);
|
||||
});
|
||||
|
||||
|
|
|
@ -5,17 +5,17 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import { last, map, merge, OperatorFunction, scan, share } from 'rxjs';
|
||||
import type { Logger } from '@kbn/logging';
|
||||
import {
|
||||
UnvalidatedToolCall,
|
||||
ToolOptions,
|
||||
ChatCompletionChunkEvent,
|
||||
ChatCompletionEventType,
|
||||
ChatCompletionMessageEvent,
|
||||
ChatCompletionTokenCountEvent,
|
||||
ToolOptions,
|
||||
UnvalidatedToolCall,
|
||||
withoutTokenCountEvents,
|
||||
} from '@kbn/inference-common';
|
||||
import type { Logger } from '@kbn/logging';
|
||||
import { OperatorFunction, map, merge, share, toArray } from 'rxjs';
|
||||
import { validateToolCalls } from '../../util/validate_tool_calls';
|
||||
|
||||
export function chunksIntoMessage<TToolOptions extends ToolOptions>({
|
||||
|
@ -37,38 +37,36 @@ export function chunksIntoMessage<TToolOptions extends ToolOptions>({
|
|||
shared$,
|
||||
shared$.pipe(
|
||||
withoutTokenCountEvents(),
|
||||
scan(
|
||||
(prev, chunk) => {
|
||||
prev.content += chunk.content ?? '';
|
||||
toArray(),
|
||||
map((chunks): ChatCompletionMessageEvent<TToolOptions> => {
|
||||
const concatenatedChunk = chunks.reduce(
|
||||
(prev, chunk) => {
|
||||
prev.content += chunk.content ?? '';
|
||||
|
||||
chunk.tool_calls?.forEach((toolCall) => {
|
||||
let prevToolCall = prev.tool_calls[toolCall.index];
|
||||
if (!prevToolCall) {
|
||||
prev.tool_calls[toolCall.index] = {
|
||||
function: {
|
||||
name: '',
|
||||
arguments: '',
|
||||
},
|
||||
toolCallId: '',
|
||||
};
|
||||
chunk.tool_calls?.forEach((toolCall) => {
|
||||
let prevToolCall = prev.tool_calls[toolCall.index];
|
||||
if (!prevToolCall) {
|
||||
prev.tool_calls[toolCall.index] = {
|
||||
function: {
|
||||
name: '',
|
||||
arguments: '',
|
||||
},
|
||||
toolCallId: '',
|
||||
};
|
||||
|
||||
prevToolCall = prev.tool_calls[toolCall.index];
|
||||
}
|
||||
prevToolCall = prev.tool_calls[toolCall.index];
|
||||
}
|
||||
|
||||
prevToolCall.function.name += toolCall.function.name;
|
||||
prevToolCall.function.arguments += toolCall.function.arguments;
|
||||
prevToolCall.toolCallId += toolCall.toolCallId;
|
||||
});
|
||||
prevToolCall.function.name += toolCall.function.name;
|
||||
prevToolCall.function.arguments += toolCall.function.arguments;
|
||||
prevToolCall.toolCallId += toolCall.toolCallId;
|
||||
});
|
||||
|
||||
return prev;
|
||||
},
|
||||
{ content: '', tool_calls: [] as UnvalidatedToolCall[] }
|
||||
);
|
||||
|
||||
return prev;
|
||||
},
|
||||
{
|
||||
content: '',
|
||||
tool_calls: [] as UnvalidatedToolCall[],
|
||||
}
|
||||
),
|
||||
last(),
|
||||
map((concatenatedChunk): ChatCompletionMessageEvent<TToolOptions> => {
|
||||
logger.debug(() => `Received completed message: ${JSON.stringify(concatenatedChunk)}`);
|
||||
|
||||
const validatedToolCalls = validateToolCalls<TToolOptions>({
|
||||
|
|
|
@ -76,9 +76,11 @@ const chatCompleteBodySchema: Type<ChatCompleteRequestBody> = schema.object({
|
|||
name: schema.maybe(schema.string()),
|
||||
}),
|
||||
schema.object({
|
||||
name: schema.string(),
|
||||
role: schema.literal(MessageRole.Tool),
|
||||
toolCallId: schema.string(),
|
||||
response: schema.recordOf(schema.string(), schema.any()),
|
||||
data: schema.maybe(schema.recordOf(schema.string(), schema.any())),
|
||||
}),
|
||||
])
|
||||
),
|
||||
|
|
|
@ -34,6 +34,7 @@ export const generateEsqlTask = <TToolOptions extends ToolOptions>({
|
|||
docBase,
|
||||
functionCalling,
|
||||
logger,
|
||||
system,
|
||||
}: {
|
||||
connectorId: string;
|
||||
systemMessage: string;
|
||||
|
@ -43,6 +44,7 @@ export const generateEsqlTask = <TToolOptions extends ToolOptions>({
|
|||
docBase: EsqlDocumentBase;
|
||||
functionCalling?: FunctionCallingMode;
|
||||
logger: Pick<Logger, 'debug'>;
|
||||
system?: string;
|
||||
}) => {
|
||||
return function askLlmToRespond({
|
||||
documentationRequest: { commands, functions },
|
||||
|
@ -97,7 +99,7 @@ export const generateEsqlTask = <TToolOptions extends ToolOptions>({
|
|||
|
||||
When converting queries from one language to ES|QL, make sure that the functions are available
|
||||
and documented in ES|QL. E.g., for SPL's LEN, use LENGTH. For IF, use CASE.
|
||||
`,
|
||||
${system ? `## Additional instructions\n\n${system}` : ''}`,
|
||||
messages: [
|
||||
...messages,
|
||||
{
|
||||
|
@ -106,6 +108,7 @@ export const generateEsqlTask = <TToolOptions extends ToolOptions>({
|
|||
toolCalls: [fakeRequestDocsToolCall],
|
||||
},
|
||||
{
|
||||
name: fakeRequestDocsToolCall.function.name,
|
||||
role: MessageRole.Tool,
|
||||
response: {
|
||||
documentation: requestedDocumentation,
|
||||
|
|
|
@ -21,6 +21,7 @@ export function naturalLanguageToEsql<TToolOptions extends ToolOptions>({
|
|||
toolChoice,
|
||||
logger,
|
||||
functionCalling,
|
||||
system,
|
||||
...rest
|
||||
}: NlToEsqlTaskParams<TToolOptions>): Observable<NlToEsqlTaskEvent<TToolOptions>> {
|
||||
return from(loadDocBase()).pipe(
|
||||
|
@ -41,6 +42,7 @@ export function naturalLanguageToEsql<TToolOptions extends ToolOptions>({
|
|||
tools,
|
||||
toolChoice,
|
||||
},
|
||||
system,
|
||||
});
|
||||
|
||||
return requestDocumentation({
|
||||
|
|
|
@ -29,5 +29,6 @@ export type NlToEsqlTaskParams<TToolOptions extends ToolOptions> = {
|
|||
connectorId: string;
|
||||
logger: Pick<Logger, 'debug'>;
|
||||
functionCalling?: FunctionCallingMode;
|
||||
system?: string;
|
||||
} & TToolOptions &
|
||||
({ input: string } | { messages: Message[] });
|
||||
|
|
|
@ -108,7 +108,7 @@ describe('validateToolCalls', () => {
|
|||
});
|
||||
}
|
||||
expect(() => validate()).toThrowErrorMatchingInlineSnapshot(
|
||||
`"Tool call arguments for my_function were invalid"`
|
||||
`"Tool call arguments for my_function (1) were invalid"`
|
||||
);
|
||||
|
||||
try {
|
||||
|
@ -119,6 +119,15 @@ describe('validateToolCalls', () => {
|
|||
arguments: JSON.stringify({ foo: 'bar' }),
|
||||
errorsText: `data must have required property 'bar'`,
|
||||
name: 'my_function',
|
||||
toolCalls: [
|
||||
{
|
||||
function: {
|
||||
arguments: JSON.stringify({ foo: 'bar' }),
|
||||
name: 'my_function',
|
||||
},
|
||||
toolCallId: '1',
|
||||
},
|
||||
],
|
||||
});
|
||||
} else {
|
||||
fail('Expected toolValidationError');
|
||||
|
|
|
@ -54,11 +54,12 @@ export function validateToolCalls<TToolOptions extends ToolOptions>({
|
|||
|
||||
if (!valid) {
|
||||
throw createToolValidationError(
|
||||
`Tool call arguments for ${toolCall.function.name} were invalid`,
|
||||
`Tool call arguments for ${toolCall.function.name} (${toolCall.toolCallId}) were invalid`,
|
||||
{
|
||||
name: toolCall.function.name,
|
||||
errorsText: validator.errorsText(),
|
||||
arguments: toolCall.function.arguments,
|
||||
toolCalls,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
|
|
@ -51,6 +51,9 @@
|
|||
"@kbn/rule-data-utils",
|
||||
"@kbn/spaces-plugin",
|
||||
"@kbn/cloud-plugin",
|
||||
"@kbn/observability-utils-browser",
|
||||
"@kbn/observability-utils-server",
|
||||
"@kbn/observability-utils-common",
|
||||
"@kbn/storybook",
|
||||
"@kbn/dashboard-plugin",
|
||||
"@kbn/deeplinks-analytics",
|
||||
|
|
|
@ -17,6 +17,7 @@ import { SearchBar, IUnifiedSearchPluginServices } from '@kbn/unified-search-plu
|
|||
import { KibanaContextProvider } from '@kbn/kibana-react-plugin/public';
|
||||
import { merge } from 'lodash';
|
||||
import { Storage } from '@kbn/kibana-utils-plugin/public';
|
||||
import { of } from 'rxjs';
|
||||
import type { EsqlQueryMeta } from '../public/services/esql';
|
||||
import type { InvestigateAppServices } from '../public/services/types';
|
||||
import { InvestigateAppKibanaContext } from '../public/hooks/use_kibana';
|
||||
|
@ -54,6 +55,10 @@ export function getMockInvestigateAppContext(): DeeplyMockedKeys<InvestigateAppK
|
|||
}),
|
||||
},
|
||||
charts: {} as any,
|
||||
investigateAppRepositoryClient: {
|
||||
fetch: jest.fn().mockImplementation(() => Promise.resolve()),
|
||||
stream: jest.fn().mockImplementation(() => of()) as any,
|
||||
},
|
||||
};
|
||||
|
||||
const core = coreMock.createStart();
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
"type": "plugin",
|
||||
"id": "@kbn/investigate-app-plugin",
|
||||
"owner": "@elastic/obs-ux-management-team",
|
||||
"group": "observability",
|
||||
"visibility": "private",
|
||||
"group": "observability",
|
||||
"plugin": {
|
||||
"id": "investigateApp",
|
||||
"server": true,
|
||||
|
@ -24,14 +24,22 @@
|
|||
"observability",
|
||||
"licensing",
|
||||
"ruleRegistry",
|
||||
"inference",
|
||||
"alerting",
|
||||
"spaces",
|
||||
"slo",
|
||||
"apmDataAccess",
|
||||
"usageCollection"
|
||||
],
|
||||
"optionalPlugins": [
|
||||
"observabilityAIAssistant",
|
||||
"observabilityAIAssistantApp"
|
||||
],
|
||||
"requiredBundles": [
|
||||
"esql",
|
||||
"kibanaReact",
|
||||
"kibanaUtils"
|
||||
],
|
||||
"optionalPlugins": ["observabilityAIAssistant"],
|
||||
"extraPublicDirs": []
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@ import type {
|
|||
ReturnOf,
|
||||
RouteRepositoryClient,
|
||||
} from '@kbn/server-route-repository';
|
||||
import { formatRequest } from '@kbn/server-route-repository-utils/src/format_request';
|
||||
import { createRepositoryClient } from '@kbn/server-route-repository-client';
|
||||
import type { InvestigateAppServerRouteRepository } from '../../server';
|
||||
|
||||
type FetchOptions = Omit<HttpFetchOptions, 'body'> & {
|
||||
|
@ -25,15 +25,15 @@ export type InvestigateAppAPIClientOptions = Omit<
|
|||
signal: AbortSignal | null;
|
||||
};
|
||||
|
||||
export type InvestigateAppAPIClient = RouteRepositoryClient<
|
||||
export type InvestigateAppRepositoryClient = RouteRepositoryClient<
|
||||
InvestigateAppServerRouteRepository,
|
||||
InvestigateAppAPIClientOptions
|
||||
>['fetch'];
|
||||
>;
|
||||
|
||||
export type AutoAbortedInvestigateAppAPIClient = RouteRepositoryClient<
|
||||
export type AutoAbortedInvestigateAppRepositoryClient = RouteRepositoryClient<
|
||||
InvestigateAppServerRouteRepository,
|
||||
Omit<InvestigateAppAPIClientOptions, 'signal'>
|
||||
>['fetch'];
|
||||
>;
|
||||
|
||||
export type InvestigateAppAPIEndpoint = keyof InvestigateAppServerRouteRepository;
|
||||
|
||||
|
@ -45,19 +45,6 @@ export type APIReturnType<TEndpoint extends InvestigateAppAPIEndpoint> = ReturnO
|
|||
export type InvestigateAppAPIClientRequestParamsOf<TEndpoint extends InvestigateAppAPIEndpoint> =
|
||||
ClientRequestParamsOf<InvestigateAppServerRouteRepository, TEndpoint>;
|
||||
|
||||
export function createCallInvestigateAppAPI(core: CoreStart | CoreSetup) {
|
||||
return ((endpoint, options) => {
|
||||
const { params } = options as unknown as {
|
||||
params?: Partial<Record<string, any>>;
|
||||
};
|
||||
|
||||
const { method, pathname, version } = formatRequest(endpoint, params?.path);
|
||||
|
||||
return core.http[method](pathname, {
|
||||
...options,
|
||||
body: params && params.body ? JSON.stringify(params.body) : undefined,
|
||||
query: params?.query,
|
||||
version,
|
||||
});
|
||||
}) as InvestigateAppAPIClient;
|
||||
export function createInvestigateAppRepositoryClient(core: CoreStart | CoreSetup) {
|
||||
return createRepositoryClient(core) as InvestigateAppRepositoryClient;
|
||||
}
|
||||
|
|
|
@ -4,19 +4,22 @@
|
|||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
import dedent from 'dedent';
|
||||
import {
|
||||
ALERT_RULE_PARAMETERS,
|
||||
ALERT_START,
|
||||
ALERT_RULE_CATEGORY,
|
||||
ALERT_REASON,
|
||||
} from '@kbn/rule-data-utils';
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { EntityWithSource } from '@kbn/investigation-shared';
|
||||
import React, { useCallback } from 'react';
|
||||
import type { RootCauseAnalysisEvent } from '@kbn/observability-ai-server/root_cause_analysis';
|
||||
import { EcsFieldsResponse } from '@kbn/rule-registry-plugin/common';
|
||||
import React, { useState, useRef, useEffect } from 'react';
|
||||
import { omit } from 'lodash';
|
||||
import {
|
||||
ALERT_FLAPPING_HISTORY,
|
||||
ALERT_RULE_EXECUTION_TIMESTAMP,
|
||||
ALERT_RULE_EXECUTION_UUID,
|
||||
EVENT_ACTION,
|
||||
EVENT_KIND,
|
||||
} from '@kbn/rule-registry-plugin/common/technical_rule_data_field_names';
|
||||
import { isRequestAbortedError } from '@kbn/server-route-repository-client';
|
||||
import { useKibana } from '../../../../hooks/use_kibana';
|
||||
import { useInvestigation } from '../../contexts/investigation_context';
|
||||
import { useFetchEntities } from '../../../../hooks/use_fetch_entities';
|
||||
import { useUpdateInvestigation } from '../../../../hooks/use_update_investigation';
|
||||
|
||||
export interface InvestigationContextualInsight {
|
||||
key: string;
|
||||
|
@ -25,98 +28,177 @@ export interface InvestigationContextualInsight {
|
|||
}
|
||||
|
||||
export function AssistantHypothesis({ investigationId }: { investigationId: string }) {
|
||||
const { alert } = useInvestigation();
|
||||
const {
|
||||
alert,
|
||||
globalParams: { timeRange },
|
||||
investigation,
|
||||
} = useInvestigation();
|
||||
const {
|
||||
core: { notifications },
|
||||
services: { investigateAppRepositoryClient },
|
||||
dependencies: {
|
||||
start: {
|
||||
observabilityAIAssistant: {
|
||||
ObservabilityAIAssistantContextualInsight,
|
||||
getContextualInsightMessages,
|
||||
},
|
||||
observabilityAIAssistant: { useGenAIConnectors },
|
||||
observabilityAIAssistantApp: { RootCauseAnalysisContainer },
|
||||
},
|
||||
},
|
||||
} = useKibana();
|
||||
const { data: entitiesData } = useFetchEntities({
|
||||
investigationId,
|
||||
serviceName: alert?.['service.name'] ? `${alert?.['service.name']}` : undefined,
|
||||
serviceEnvironment: alert?.['service.environment']
|
||||
? `${alert?.['service.environment']}`
|
||||
: undefined,
|
||||
hostName: alert?.['host.name'] ? `${alert?.['host.name']}` : undefined,
|
||||
containerId: alert?.['container.id'] ? `${alert?.['container.id']}` : undefined,
|
||||
});
|
||||
|
||||
const getAlertContextMessages = useCallback(async () => {
|
||||
if (!getContextualInsightMessages || !alert) {
|
||||
return [];
|
||||
const { mutateAsync: updateInvestigation } = useUpdateInvestigation();
|
||||
|
||||
const { loading: loadingConnector, selectedConnector } = useGenAIConnectors();
|
||||
|
||||
const serviceName = alert?.['service.name'] as string | undefined;
|
||||
|
||||
const [events, setEvents] = useState<RootCauseAnalysisEvent[]>([]);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [error, setError] = useState<Error | undefined>(undefined);
|
||||
|
||||
const controllerRef = useRef(new AbortController());
|
||||
|
||||
useEffect(() => {
|
||||
if (investigation?.rootCauseAnalysis) {
|
||||
setEvents(investigation.rootCauseAnalysis.events);
|
||||
}
|
||||
}, [investigation?.rootCauseAnalysis]);
|
||||
|
||||
const entities = entitiesData?.entities ?? [];
|
||||
const [completeInBackground, setCompleteInBackground] = useState(true);
|
||||
|
||||
const entityContext = entities?.length
|
||||
? `
|
||||
Alerts can optionally be associated with entities. Entities can be services, hosts, containers, or other resources. Entities can have metrics associated with them.
|
||||
|
||||
The alert that triggered this investigation is associated with the following entities: ${entities
|
||||
.map((entity, index) => {
|
||||
return dedent(`
|
||||
## Entity ${index + 1}:
|
||||
${formatEntityMetrics(entity)};
|
||||
`);
|
||||
})
|
||||
.join('/n/n')}`
|
||||
: '';
|
||||
const runRootCauseAnalysis = ({
|
||||
alert: nonNullishAlert,
|
||||
connectorId,
|
||||
serviceName: nonNullishServiceName,
|
||||
}: {
|
||||
alert: EcsFieldsResponse;
|
||||
connectorId: string;
|
||||
serviceName: string;
|
||||
}) => {
|
||||
const rangeFrom = timeRange.from;
|
||||
|
||||
return getContextualInsightMessages({
|
||||
message: `I am investigating a failure in my system. I was made aware of the failure by an alert and I am trying to understand the root cause of the issue.`,
|
||||
instructions: dedent(
|
||||
`I'm an SRE. I am investigating a failure in my system. I was made aware of the failure via an alert. Your current task is to help me identify the root cause of the failure in my system.
|
||||
const rangeTo = timeRange.to;
|
||||
|
||||
The rule that triggered the alert is a ${
|
||||
alert[ALERT_RULE_CATEGORY]
|
||||
} rule. The alert started at ${alert[ALERT_START]}. The alert reason is ${
|
||||
alert[ALERT_REASON]
|
||||
}. The rule parameters are ${JSON.stringify(ALERT_RULE_PARAMETERS)}.
|
||||
setLoading(true);
|
||||
|
||||
${entityContext}
|
||||
setError(undefined);
|
||||
|
||||
Based on the alert details, suggest a root cause and next steps to mitigate the issue.
|
||||
|
||||
I do not have the alert details or entity details in front of me, so be sure to repeat the alert reason (${
|
||||
alert[ALERT_REASON]
|
||||
}), when the alert was triggered (${
|
||||
alert[ALERT_START]
|
||||
}), and the entity metrics in your response.
|
||||
setEvents([]);
|
||||
|
||||
When displaying the entity metrics, please convert the metrics to a human-readable format. For example, convert "logRate" to "Log Rate" and "errorRate" to "Error Rate".
|
||||
`
|
||||
),
|
||||
});
|
||||
}, [alert, getContextualInsightMessages, entitiesData?.entities]);
|
||||
investigateAppRepositoryClient
|
||||
.stream('POST /internal/observability/investigation/root_cause_analysis', {
|
||||
params: {
|
||||
body: {
|
||||
investigationId,
|
||||
connectorId,
|
||||
context: `The user is investigating an alert for the ${serviceName} service,
|
||||
and wants to find the root cause. Here is the alert:
|
||||
|
||||
if (!ObservabilityAIAssistantContextualInsight) {
|
||||
${JSON.stringify(sanitizeAlert(nonNullishAlert))}`,
|
||||
rangeFrom,
|
||||
rangeTo,
|
||||
serviceName: nonNullishServiceName,
|
||||
completeInBackground,
|
||||
},
|
||||
},
|
||||
signal: controllerRef.current.signal,
|
||||
})
|
||||
.subscribe({
|
||||
next: (event) => {
|
||||
setEvents((prev) => {
|
||||
return prev.concat(event.event);
|
||||
});
|
||||
},
|
||||
error: (nextError) => {
|
||||
if (!isRequestAbortedError(nextError)) {
|
||||
notifications.toasts.addError(nextError, {
|
||||
title: i18n.translate(
|
||||
'xpack.investigateApp.assistantHypothesis.failedToLoadAnalysis',
|
||||
{
|
||||
defaultMessage: `Failed to load analysis`,
|
||||
}
|
||||
),
|
||||
});
|
||||
setError(nextError);
|
||||
} else {
|
||||
setError(
|
||||
new Error(
|
||||
i18n.translate('xpack.investigateApp.assistantHypothesis.analysisAborted', {
|
||||
defaultMessage: `Analysis was aborted`,
|
||||
})
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
setLoading(false);
|
||||
},
|
||||
complete: () => {
|
||||
setLoading(false);
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
if (!serviceName) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return alert && entitiesData ? (
|
||||
<ObservabilityAIAssistantContextualInsight
|
||||
title={i18n.translate(
|
||||
'xpack.investigateApp.assistantHypothesis.observabilityAIAssistantContextualInsight.helpMeInvestigateThisLabel',
|
||||
{ defaultMessage: 'Help me investigate this failure' }
|
||||
)}
|
||||
messages={getAlertContextMessages}
|
||||
return (
|
||||
<RootCauseAnalysisContainer
|
||||
events={events}
|
||||
loading={loading || loadingConnector}
|
||||
completeInBackground={completeInBackground}
|
||||
onCompleteInBackgroundClick={() => {
|
||||
setCompleteInBackground(() => !completeInBackground);
|
||||
}}
|
||||
onStopAnalysisClick={() => {
|
||||
controllerRef.current.abort();
|
||||
controllerRef.current = new AbortController();
|
||||
}}
|
||||
onClearAnalysisClick={() => {
|
||||
setEvents([]);
|
||||
if (investigation?.rootCauseAnalysis) {
|
||||
updateInvestigation({
|
||||
investigationId,
|
||||
payload: {
|
||||
rootCauseAnalysis: {
|
||||
events: [],
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
}}
|
||||
onResetAnalysisClick={() => {
|
||||
controllerRef.current.abort();
|
||||
controllerRef.current = new AbortController();
|
||||
if (alert && selectedConnector && serviceName) {
|
||||
runRootCauseAnalysis({
|
||||
alert,
|
||||
connectorId: selectedConnector,
|
||||
serviceName,
|
||||
});
|
||||
}
|
||||
}}
|
||||
error={error}
|
||||
onStartAnalysisClick={() => {
|
||||
if (alert && selectedConnector && serviceName) {
|
||||
runRootCauseAnalysis({
|
||||
alert,
|
||||
connectorId: selectedConnector,
|
||||
serviceName,
|
||||
});
|
||||
}
|
||||
}}
|
||||
/>
|
||||
) : null;
|
||||
);
|
||||
}
|
||||
|
||||
function sanitizeAlert(alert: EcsFieldsResponse) {
|
||||
return omit(
|
||||
alert,
|
||||
ALERT_RULE_EXECUTION_TIMESTAMP,
|
||||
'_index',
|
||||
ALERT_FLAPPING_HISTORY,
|
||||
EVENT_ACTION,
|
||||
EVENT_KIND,
|
||||
ALERT_RULE_EXECUTION_UUID,
|
||||
'@timestamp'
|
||||
);
|
||||
}
|
||||
const formatEntityMetrics = (entity: EntityWithSource): string => {
|
||||
const entityMetrics = Object.entries(entity.metrics)
|
||||
.map(([key, value]) => `${key}: ${value}`)
|
||||
.join(', ');
|
||||
const entitySources = entity.sources.map((source) => source.dataStream).join(', ');
|
||||
return dedent(`
|
||||
Entity name: ${entity.display_name};
|
||||
Entity type: ${entity.type};
|
||||
Entity metrics: ${entityMetrics};
|
||||
Entity data streams: ${entitySources}
|
||||
`);
|
||||
};
|
||||
|
|
|
@ -27,6 +27,7 @@ import type {
|
|||
InvestigateAppSetupDependencies,
|
||||
InvestigateAppStartDependencies,
|
||||
} from './types';
|
||||
import { createInvestigateAppRepositoryClient, InvestigateAppRepositoryClient } from './api';
|
||||
|
||||
const getCreateEsqlService = once(() => import('./services/esql').then((m) => m.createEsqlService));
|
||||
|
||||
|
@ -41,6 +42,7 @@ export class InvestigateAppPlugin
|
|||
{
|
||||
logger: Logger;
|
||||
config: ConfigSchema;
|
||||
repositoryClient!: InvestigateAppRepositoryClient;
|
||||
|
||||
constructor(context: PluginInitializerContext<ConfigSchema>) {
|
||||
this.logger = context.logger.get();
|
||||
|
@ -51,6 +53,8 @@ export class InvestigateAppPlugin
|
|||
coreSetup: CoreSetup<InvestigateAppStartDependencies, InvestigateAppPublicStart>,
|
||||
pluginsSetup: InvestigateAppSetupDependencies
|
||||
): InvestigateAppPublicSetup {
|
||||
this.repositoryClient = createInvestigateAppRepositoryClient(coreSetup);
|
||||
|
||||
coreSetup.application.register({
|
||||
id: INVESTIGATE_APP_ID,
|
||||
title: i18n.translate('xpack.investigateApp.appTitle', {
|
||||
|
@ -93,6 +97,7 @@ export class InvestigateAppPlugin
|
|||
lens: pluginsStart.lens,
|
||||
}),
|
||||
charts: pluginsStart.charts,
|
||||
investigateAppRepositoryClient: this.repositoryClient,
|
||||
};
|
||||
|
||||
ReactDOM.render(
|
||||
|
@ -127,6 +132,7 @@ export class InvestigateAppPlugin
|
|||
start: pluginsStart,
|
||||
},
|
||||
services: {
|
||||
investigateAppRepositoryClient: this.repositoryClient,
|
||||
esql: createEsqlService({
|
||||
data: pluginsStart.data,
|
||||
dataViews: pluginsStart.dataViews,
|
||||
|
|
|
@ -7,8 +7,10 @@
|
|||
|
||||
import { ChartsPluginStart } from '@kbn/charts-plugin/public';
|
||||
import type { EsqlService } from './esql';
|
||||
import type { InvestigateAppRepositoryClient } from '../api';
|
||||
|
||||
export interface InvestigateAppServices {
|
||||
esql: EsqlService;
|
||||
charts: ChartsPluginStart;
|
||||
investigateAppRepositoryClient: InvestigateAppRepositoryClient;
|
||||
}
|
||||
|
|
|
@ -8,6 +8,10 @@ import type {
|
|||
ObservabilityAIAssistantPublicSetup,
|
||||
ObservabilityAIAssistantPublicStart,
|
||||
} from '@kbn/observability-ai-assistant-plugin/public';
|
||||
import type {
|
||||
ObservabilityAIAssistantAppPublicSetup,
|
||||
ObservabilityAIAssistantAppPublicStart,
|
||||
} from '@kbn/observability-ai-assistant-app-plugin/public';
|
||||
import { ChartsPluginStart } from '@kbn/charts-plugin/public';
|
||||
import type { ContentManagementPublicStart } from '@kbn/content-management-plugin/public';
|
||||
import type { DataPublicPluginSetup, DataPublicPluginStart } from '@kbn/data-plugin/public';
|
||||
|
@ -43,6 +47,7 @@ export interface InvestigateAppSetupDependencies {
|
|||
investigate: InvestigatePublicSetup;
|
||||
observabilityShared: ObservabilitySharedPluginSetup;
|
||||
observabilityAIAssistant: ObservabilityAIAssistantPublicSetup;
|
||||
observabilityAIAssistantApp: ObservabilityAIAssistantAppPublicSetup;
|
||||
lens: LensPublicSetup;
|
||||
dataViews: DataViewsPublicPluginSetup;
|
||||
data: DataPublicPluginSetup;
|
||||
|
@ -58,6 +63,7 @@ export interface InvestigateAppStartDependencies {
|
|||
investigate: InvestigatePublicStart;
|
||||
observabilityShared: ObservabilitySharedPluginStart;
|
||||
observabilityAIAssistant: ObservabilityAIAssistantPublicStart;
|
||||
observabilityAIAssistantApp: ObservabilityAIAssistantAppPublicStart;
|
||||
lens: LensPublicStart;
|
||||
dataViews: DataViewsPublicPluginStart;
|
||||
data: DataPublicPluginStart;
|
||||
|
|
|
@ -15,18 +15,19 @@ import {
|
|||
findInvestigationsParamsSchema,
|
||||
getAllInvestigationStatsParamsSchema,
|
||||
getAllInvestigationTagsParamsSchema,
|
||||
getEntitiesParamsSchema,
|
||||
GetEntitiesResponse,
|
||||
getEventsParamsSchema,
|
||||
GetEventsResponse,
|
||||
getInvestigationItemsParamsSchema,
|
||||
getInvestigationNotesParamsSchema,
|
||||
getInvestigationParamsSchema,
|
||||
updateInvestigationItemParamsSchema,
|
||||
updateInvestigationNoteParamsSchema,
|
||||
updateInvestigationParamsSchema,
|
||||
getEventsParamsSchema,
|
||||
GetEventsResponse,
|
||||
getEntitiesParamsSchema,
|
||||
GetEntitiesResponse,
|
||||
} from '@kbn/investigation-shared';
|
||||
import { ScopedAnnotationsClient } from '@kbn/observability-plugin/server';
|
||||
import { createEntitiesESClient } from '../clients/create_entities_es_client';
|
||||
import { createInvestigation } from '../services/create_investigation';
|
||||
import { createInvestigationItem } from '../services/create_investigation_item';
|
||||
import { createInvestigationNote } from '../services/create_investigation_note';
|
||||
|
@ -34,20 +35,20 @@ import { deleteInvestigation } from '../services/delete_investigation';
|
|||
import { deleteInvestigationItem } from '../services/delete_investigation_item';
|
||||
import { deleteInvestigationNote } from '../services/delete_investigation_note';
|
||||
import { findInvestigations } from '../services/find_investigations';
|
||||
import { AlertsClient, getAlertsClient } from '../services/get_alerts_client';
|
||||
import { getAllInvestigationStats } from '../services/get_all_investigation_stats';
|
||||
import { getAllInvestigationTags } from '../services/get_all_investigation_tags';
|
||||
import { getEntitiesWithSource } from '../services/get_entities';
|
||||
import { getAlertEvents, getAnnotationEvents } from '../services/get_events';
|
||||
import { getInvestigation } from '../services/get_investigation';
|
||||
import { getInvestigationItems } from '../services/get_investigation_items';
|
||||
import { getInvestigationNotes } from '../services/get_investigation_notes';
|
||||
import { investigationRepositoryFactory } from '../services/investigation_repository';
|
||||
import { updateInvestigation } from '../services/update_investigation';
|
||||
import { getAlertEvents, getAnnotationEvents } from '../services/get_events';
|
||||
import { AlertsClient, getAlertsClient } from '../services/get_alerts_client';
|
||||
import { updateInvestigationItem } from '../services/update_investigation_item';
|
||||
import { updateInvestigationNote } from '../services/update_investigation_note';
|
||||
import { createInvestigateAppServerRoute } from './create_investigate_app_server_route';
|
||||
import { getAllInvestigationStats } from '../services/get_all_investigation_stats';
|
||||
import { getEntitiesWithSource } from '../services/get_entities';
|
||||
import { createEntitiesESClient } from '../clients/create_entities_es_client';
|
||||
import { rootCauseAnalysisRoute } from './rca/route';
|
||||
|
||||
const createInvestigationRoute = createInvestigateAppServerRoute({
|
||||
endpoint: 'POST /api/observability/investigations 2023-10-31',
|
||||
|
@ -400,6 +401,7 @@ export function getGlobalInvestigateAppServerRouteRepository() {
|
|||
...getEntitiesRoute,
|
||||
...getAllInvestigationStatsRoute,
|
||||
...getAllInvestigationTagsRoute,
|
||||
...rootCauseAnalysisRoute,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,163 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { Observable, catchError, from, of, share, switchMap, toArray } from 'rxjs';
|
||||
import { ServerSentEventBase } from '@kbn/sse-utils';
|
||||
import {
|
||||
RootCauseAnalysisEvent,
|
||||
runRootCauseAnalysis,
|
||||
} from '@kbn/observability-ai-server/root_cause_analysis';
|
||||
import { z } from '@kbn/zod';
|
||||
import datemath from '@elastic/datemath';
|
||||
import { OBSERVABILITY_LOGS_DATA_ACCESS_LOG_SOURCES_ID } from '@kbn/management-settings-ids';
|
||||
import { createObservabilityEsClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client';
|
||||
import { preconditionFailed } from '@hapi/boom';
|
||||
import { createInvestigateAppServerRoute } from '../create_investigate_app_server_route';
|
||||
import { investigationRepositoryFactory } from '../../services/investigation_repository';
|
||||
|
||||
export const rootCauseAnalysisRoute = createInvestigateAppServerRoute({
|
||||
endpoint: 'POST /internal/observability/investigation/root_cause_analysis',
|
||||
options: {
|
||||
tags: [],
|
||||
},
|
||||
params: z.object({
|
||||
body: z.object({
|
||||
investigationId: z.string(),
|
||||
rangeFrom: z.string(),
|
||||
rangeTo: z.string(),
|
||||
serviceName: z.string(),
|
||||
context: z.string(),
|
||||
connectorId: z.string(),
|
||||
completeInBackground: z.boolean().optional(),
|
||||
}),
|
||||
}),
|
||||
handler: async ({
|
||||
params,
|
||||
plugins,
|
||||
request,
|
||||
context: requestContext,
|
||||
logger,
|
||||
}): Promise<Observable<ServerSentEventBase<'event', { event: RootCauseAnalysisEvent }>>> => {
|
||||
const {
|
||||
body: {
|
||||
investigationId,
|
||||
context,
|
||||
rangeFrom,
|
||||
rangeTo,
|
||||
serviceName,
|
||||
connectorId,
|
||||
completeInBackground,
|
||||
},
|
||||
} = params;
|
||||
|
||||
if (!plugins.observabilityAIAssistant) {
|
||||
throw preconditionFailed('Observability AI Assistant plugin is not available');
|
||||
}
|
||||
|
||||
const start = datemath.parse(rangeFrom)?.valueOf()!;
|
||||
const end = datemath.parse(rangeTo)?.valueOf()!;
|
||||
|
||||
const coreContext = await requestContext.core;
|
||||
|
||||
const coreEsClient = coreContext.elasticsearch.client.asCurrentUser;
|
||||
const soClient = coreContext.savedObjects.client;
|
||||
const uiSettingsClient = coreContext.uiSettings.client;
|
||||
|
||||
const repository = investigationRepositoryFactory({ soClient, logger });
|
||||
|
||||
const esClient = createObservabilityEsClient({
|
||||
client: coreEsClient,
|
||||
logger,
|
||||
plugin: 'investigateApp',
|
||||
});
|
||||
|
||||
const [
|
||||
investigation,
|
||||
rulesClient,
|
||||
alertsClient,
|
||||
inferenceClient,
|
||||
observabilityAIAssistantClient,
|
||||
spaceId = 'default',
|
||||
apmIndices,
|
||||
logSources,
|
||||
sloSummaryIndices,
|
||||
] = await Promise.all([
|
||||
repository.findById(investigationId),
|
||||
(await plugins.alerting.start()).getRulesClientWithRequest(request),
|
||||
(await plugins.ruleRegistry.start()).getRacClientWithRequest(request),
|
||||
(await plugins.inference.start()).getClient({ request }),
|
||||
plugins
|
||||
.observabilityAIAssistant!.start()
|
||||
.then((observabilityAIAssistantStart) =>
|
||||
observabilityAIAssistantStart.service.getClient({ request, scopes: ['observability'] })
|
||||
),
|
||||
(await plugins.spaces?.start())?.spacesService.getSpaceId(request),
|
||||
plugins.apmDataAccess.setup.getApmIndices(soClient),
|
||||
uiSettingsClient.get(OBSERVABILITY_LOGS_DATA_ACCESS_LOG_SOURCES_ID) as Promise<string[]>,
|
||||
(await plugins.slo.start()).getSloClientWithRequest(request).getSummaryIndices(),
|
||||
]);
|
||||
|
||||
const next$ = runRootCauseAnalysis({
|
||||
alertsClient,
|
||||
connectorId,
|
||||
start,
|
||||
end,
|
||||
esClient,
|
||||
inferenceClient,
|
||||
indices: {
|
||||
logs: logSources,
|
||||
traces: [apmIndices.span, apmIndices.error, apmIndices.transaction],
|
||||
sloSummaries: sloSummaryIndices,
|
||||
},
|
||||
rulesClient,
|
||||
observabilityAIAssistantClient,
|
||||
serviceName,
|
||||
spaceId,
|
||||
context,
|
||||
logger,
|
||||
}).pipe(
|
||||
switchMap((event) => {
|
||||
return of({
|
||||
type: 'event' as const,
|
||||
event,
|
||||
});
|
||||
})
|
||||
);
|
||||
|
||||
if (completeInBackground) {
|
||||
const shared$ = next$.pipe(share());
|
||||
|
||||
shared$
|
||||
.pipe(
|
||||
toArray(),
|
||||
catchError(() => {
|
||||
return of();
|
||||
}),
|
||||
switchMap((events) => {
|
||||
return from(
|
||||
repository.save({
|
||||
...investigation,
|
||||
rootCauseAnalysis: {
|
||||
events: events.map(({ event }) => event),
|
||||
},
|
||||
})
|
||||
);
|
||||
})
|
||||
)
|
||||
.subscribe({
|
||||
error: (error) => {
|
||||
logger.error(`Failed to update investigation: ${error.message}`);
|
||||
logger.error(error);
|
||||
},
|
||||
});
|
||||
|
||||
return shared$;
|
||||
}
|
||||
|
||||
return next$;
|
||||
},
|
||||
});
|
|
@ -5,11 +5,23 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import { ObservabilityPluginSetup } from '@kbn/observability-plugin/server';
|
||||
import {
|
||||
import type { ObservabilityPluginSetup } from '@kbn/observability-plugin/server';
|
||||
import type {
|
||||
RuleRegistryPluginSetupContract,
|
||||
RuleRegistryPluginStartContract,
|
||||
} from '@kbn/rule-registry-plugin/server';
|
||||
import type { AlertingServerSetup, AlertingServerStart } from '@kbn/alerting-plugin/server/plugin';
|
||||
import type { SLOServerStart, SLOServerSetup } from '@kbn/slo-plugin/server';
|
||||
import type { InferenceServerStart, InferenceServerSetup } from '@kbn/inference-plugin/server';
|
||||
import type { SpacesPluginSetup, SpacesPluginStart } from '@kbn/spaces-plugin/server';
|
||||
import type {
|
||||
ApmDataAccessPluginStart,
|
||||
ApmDataAccessPluginSetup,
|
||||
} from '@kbn/apm-data-access-plugin/server';
|
||||
import type {
|
||||
ObservabilityAIAssistantServerStart,
|
||||
ObservabilityAIAssistantServerSetup,
|
||||
} from '@kbn/observability-ai-assistant-plugin/server';
|
||||
import { UsageCollectionSetup } from '@kbn/usage-collection-plugin/server';
|
||||
|
||||
/* eslint-disable @typescript-eslint/no-empty-interface*/
|
||||
|
@ -19,11 +31,23 @@ export interface ConfigSchema {}
|
|||
export interface InvestigateAppSetupDependencies {
|
||||
observability: ObservabilityPluginSetup;
|
||||
ruleRegistry: RuleRegistryPluginSetupContract;
|
||||
slo: SLOServerSetup;
|
||||
alerting: AlertingServerSetup;
|
||||
inference: InferenceServerSetup;
|
||||
spaces?: SpacesPluginSetup;
|
||||
apmDataAccess: ApmDataAccessPluginSetup;
|
||||
observabilityAIAssistant?: ObservabilityAIAssistantServerSetup;
|
||||
usageCollection: UsageCollectionSetup;
|
||||
}
|
||||
|
||||
export interface InvestigateAppStartDependencies {
|
||||
ruleRegistry: RuleRegistryPluginStartContract;
|
||||
slo: SLOServerStart;
|
||||
alerting: AlertingServerStart;
|
||||
inference: InferenceServerStart;
|
||||
spaces?: SpacesPluginStart;
|
||||
apmDataAccess: ApmDataAccessPluginStart;
|
||||
observabilityAIAssistant?: ObservabilityAIAssistantServerStart;
|
||||
}
|
||||
|
||||
export interface InvestigateAppServerSetup {}
|
||||
|
|
|
@ -17,57 +17,67 @@
|
|||
".storybook/**/*.js"
|
||||
],
|
||||
"kbn_references": [
|
||||
"@kbn/esql",
|
||||
"@kbn/core",
|
||||
"@kbn/data-views-plugin",
|
||||
"@kbn/expressions-plugin",
|
||||
"@kbn/kibana-utils-plugin",
|
||||
"@kbn/utility-types-jest",
|
||||
"@kbn/es-types",
|
||||
"@kbn/data-plugin",
|
||||
"@kbn/embeddable-plugin",
|
||||
"@kbn/unified-search-plugin",
|
||||
"@kbn/kibana-react-plugin",
|
||||
"@kbn/server-route-repository",
|
||||
"@kbn/server-route-repository-client",
|
||||
"@kbn/react-kibana-context-theme",
|
||||
"@kbn/shared-ux-link-redirect-app",
|
||||
"@kbn/kibana-react-plugin",
|
||||
"@kbn/i18n",
|
||||
"@kbn/embeddable-plugin",
|
||||
"@kbn/observability-ai-assistant-plugin",
|
||||
"@kbn/lens-plugin",
|
||||
"@kbn/esql",
|
||||
"@kbn/esql-utils",
|
||||
"@kbn/data-plugin",
|
||||
"@kbn/es-types",
|
||||
"@kbn/field-types",
|
||||
"@kbn/expressions-plugin",
|
||||
"@kbn/deeplinks-observability",
|
||||
"@kbn/logging",
|
||||
"@kbn/data-views-plugin",
|
||||
"@kbn/observability-shared-plugin",
|
||||
"@kbn/config-schema",
|
||||
"@kbn/investigate-plugin",
|
||||
"@kbn/dataset-quality-plugin",
|
||||
"@kbn/utility-types-jest",
|
||||
"@kbn/content-management-plugin",
|
||||
"@kbn/kibana-utils-plugin",
|
||||
"@kbn/visualization-utils",
|
||||
"@kbn/unified-search-plugin",
|
||||
"@kbn/es-query",
|
||||
"@kbn/server-route-repository",
|
||||
"@kbn/security-plugin",
|
||||
"@kbn/ui-actions-plugin",
|
||||
"@kbn/server-route-repository-utils",
|
||||
"@kbn/core-saved-objects-server",
|
||||
"@kbn/rule-registry-plugin",
|
||||
"@kbn/shared-ux-router",
|
||||
"@kbn/i18n",
|
||||
"@kbn/investigation-shared",
|
||||
"@kbn/core-security-common",
|
||||
"@kbn/saved-objects-finder-plugin",
|
||||
"@kbn/presentation-containers",
|
||||
"@kbn/lens-plugin",
|
||||
"@kbn/rule-registry-plugin",
|
||||
"@kbn/security-plugin",
|
||||
"@kbn/rule-data-utils",
|
||||
"@kbn/investigate-plugin",
|
||||
"@kbn/observability-utils-browser",
|
||||
"@kbn/lens-embeddable-utils",
|
||||
"@kbn/i18n-react",
|
||||
"@kbn/zod",
|
||||
"@kbn/observability-plugin",
|
||||
"@kbn/licensing-plugin",
|
||||
"@kbn/rule-data-utils",
|
||||
"@kbn/es-query",
|
||||
"@kbn/saved-objects-finder-plugin",
|
||||
"@kbn/presentation-containers",
|
||||
"@kbn/observability-ai-server",
|
||||
"@kbn/charts-plugin",
|
||||
"@kbn/observability-shared-plugin",
|
||||
"@kbn/core-security-common",
|
||||
"@kbn/deeplinks-observability",
|
||||
"@kbn/logging",
|
||||
"@kbn/esql-utils",
|
||||
"@kbn/observability-ai-assistant-plugin",
|
||||
"@kbn/observability-ai-assistant-app-plugin",
|
||||
"@kbn/content-management-plugin",
|
||||
"@kbn/dataset-quality-plugin",
|
||||
"@kbn/ui-actions-plugin",
|
||||
"@kbn/field-types",
|
||||
"@kbn/entities-schema",
|
||||
"@kbn/core-elasticsearch-server",
|
||||
"@kbn/observability-plugin",
|
||||
"@kbn/config-schema",
|
||||
"@kbn/visualization-utils",
|
||||
"@kbn/usage-collection-plugin",
|
||||
"@kbn/calculate-auto",
|
||||
"@kbn/ml-random-sampler-utils",
|
||||
"@kbn/charts-plugin",
|
||||
"@kbn/observability-utils-browser",
|
||||
"@kbn/usage-collection-plugin",
|
||||
"@kbn/zod",
|
||||
"@kbn/inference-common",
|
||||
"@kbn/core-elasticsearch-server",
|
||||
"@kbn/sse-utils",
|
||||
"@kbn/management-settings-ids",
|
||||
"@kbn/observability-utils-server",
|
||||
"@kbn/licensing-plugin",
|
||||
"@kbn/core-saved-objects-server",
|
||||
"@kbn/alerting-plugin",
|
||||
"@kbn/slo-plugin",
|
||||
"@kbn/inference-plugin",
|
||||
"@kbn/spaces-plugin",
|
||||
"@kbn/apm-data-access-plugin",
|
||||
],
|
||||
}
|
||||
|
|
|
@ -52,6 +52,7 @@ export function convertMessagesForInference(messages: Message[]): InferenceMessa
|
|||
}
|
||||
|
||||
inferenceMessages.push({
|
||||
name: message.message.name!,
|
||||
role: InferenceMessageRole.Tool,
|
||||
response: JSON.parse(message.message.content ?? '{}'),
|
||||
toolCallId: toolCallRequest.toolCalls![0].toolCallId,
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue