mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 09:48:58 -04:00
# Backport This will backport the following commits from `main` to `8.x`: - [[RCA] AI-assisted root cause analysis (#197200)](https://github.com/elastic/kibana/pull/197200) <!--- Backport version: 7.3.2 --> ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport) <!--BACKPORT {commits} BACKPORT-->
This commit is contained in:
parent
10e01b4be4
commit
b3ba62a972
144 changed files with 27287 additions and 358 deletions
|
@ -919,6 +919,7 @@ module.exports = {
|
|||
'x-pack/plugins/observability_solution/exploratory_view/**/*.{js,mjs,ts,tsx}',
|
||||
'x-pack/plugins/observability_solution/ux/**/*.{js,mjs,ts,tsx}',
|
||||
'x-pack/plugins/observability_solution/slo/**/*.{js,mjs,ts,tsx}',
|
||||
'x-pack/packages/observability/**/*.{js,mjs,ts,tsx}',
|
||||
],
|
||||
rules: {
|
||||
'no-console': ['warn', { allow: ['error'] }],
|
||||
|
@ -938,6 +939,7 @@ module.exports = {
|
|||
'x-pack/plugins/observability_solution/observability/**/*.stories.*',
|
||||
'x-pack/plugins/observability_solution/exploratory_view/**/*.stories.*',
|
||||
'x-pack/plugins/observability_solution/slo/**/*.stories.*',
|
||||
'x-pack/packages/observability/**/*.{js,mjs,ts,tsx}',
|
||||
],
|
||||
rules: {
|
||||
'react/function-component-definition': [
|
||||
|
|
2
.github/CODEOWNERS
vendored
2
.github/CODEOWNERS
vendored
|
@ -662,6 +662,8 @@ packages/kbn-object-versioning-utils @elastic/appex-sharedux
|
|||
x-pack/plugins/observability_solution/observability_ai_assistant_app @elastic/obs-ai-assistant
|
||||
x-pack/plugins/observability_solution/observability_ai_assistant_management @elastic/obs-ai-assistant
|
||||
x-pack/plugins/observability_solution/observability_ai_assistant @elastic/obs-ai-assistant
|
||||
x-pack/packages/observability/observability_ai/observability_ai_common @elastic/obs-ai-assistant
|
||||
x-pack/packages/observability/observability_ai/observability_ai_server @elastic/obs-ai-assistant
|
||||
x-pack/packages/observability/alert_details @elastic/obs-ux-management-team
|
||||
x-pack/packages/observability/alerting_rule_utils @elastic/obs-ux-management-team
|
||||
x-pack/packages/observability/alerting_test_data @elastic/obs-ux-management-team
|
||||
|
|
|
@ -693,6 +693,8 @@
|
|||
"@kbn/observability-ai-assistant-app-plugin": "link:x-pack/plugins/observability_solution/observability_ai_assistant_app",
|
||||
"@kbn/observability-ai-assistant-management-plugin": "link:x-pack/plugins/observability_solution/observability_ai_assistant_management",
|
||||
"@kbn/observability-ai-assistant-plugin": "link:x-pack/plugins/observability_solution/observability_ai_assistant",
|
||||
"@kbn/observability-ai-common": "link:x-pack/packages/observability/observability_ai/observability_ai_common",
|
||||
"@kbn/observability-ai-server": "link:x-pack/packages/observability/observability_ai/observability_ai_server",
|
||||
"@kbn/observability-alert-details": "link:x-pack/packages/observability/alert_details",
|
||||
"@kbn/observability-alerting-rule-utils": "link:x-pack/packages/observability/alerting_rule_utils",
|
||||
"@kbn/observability-alerting-test-data": "link:x-pack/packages/observability/alerting_test_data",
|
||||
|
@ -1143,6 +1145,7 @@
|
|||
"fnv-plus": "^1.3.1",
|
||||
"formik": "^2.4.6",
|
||||
"fp-ts": "^2.3.1",
|
||||
"fuse.js": "^7.0.0",
|
||||
"get-port": "^5.0.0",
|
||||
"getopts": "^2.2.5",
|
||||
"getos": "^3.1.0",
|
||||
|
|
|
@ -13,6 +13,7 @@ export type {
|
|||
SearchHit,
|
||||
ESSearchResponse,
|
||||
ESSearchRequest,
|
||||
ESSearchRequestWithoutBody,
|
||||
ESSourceOptions,
|
||||
InferSearchResponseOf,
|
||||
AggregationResultOf,
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
*/
|
||||
|
||||
import * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
|
||||
import * as estypesWithoutBody from '@elastic/elasticsearch/lib/api/types';
|
||||
import type {
|
||||
Field,
|
||||
QueryDslFieldAndFormat,
|
||||
|
@ -26,6 +27,7 @@ import {
|
|||
|
||||
export type ESFilter = estypes.QueryDslQueryContainer;
|
||||
export type ESSearchRequest = estypes.SearchRequest;
|
||||
export type ESSearchRequestWithoutBody = estypesWithoutBody.SearchRequest;
|
||||
export type AggregationOptionsByType = Required<estypes.AggregationsAggregationContainer>;
|
||||
|
||||
// Typings for Elasticsearch queries and aggregations. These are intended to be
|
||||
|
|
|
@ -23,20 +23,15 @@ type InvalidAggregationRequest = unknown;
|
|||
// Union keys are not included in keyof, but extends iterates over the types in a union.
|
||||
type ValidAggregationKeysOf<T extends Record<string, any>> = T extends T ? keyof T : never;
|
||||
|
||||
type KeyOfSource<T> = Record<
|
||||
keyof T,
|
||||
(T extends Record<string, { terms: { missing_bucket: true } }> ? null : never) | string | number
|
||||
>;
|
||||
type KeyOfSource<T> = {
|
||||
[key in keyof T]:
|
||||
| (T[key] extends Record<string, { terms: { missing_bucket: true } }> ? null : never)
|
||||
| string
|
||||
| number;
|
||||
};
|
||||
|
||||
type KeysOfSources<T extends any[]> = T extends [any]
|
||||
? KeyOfSource<T[0]>
|
||||
: T extends [any, any]
|
||||
? KeyOfSource<T[0]> & KeyOfSource<T[1]>
|
||||
: T extends [any, any, any]
|
||||
? KeyOfSource<T[0]> & KeyOfSource<T[1]> & KeyOfSource<T[2]>
|
||||
: T extends [any, any, any, any]
|
||||
? KeyOfSource<T[0]> & KeyOfSource<T[1]> & KeyOfSource<T[2]> & KeyOfSource<T[3]>
|
||||
: Record<string, null | string | number>;
|
||||
// convert to intersection to be able to get all the keys
|
||||
type KeysOfSources<T extends any[]> = UnionToIntersection<KeyOfSource<ValuesType<Pick<T, number>>>>;
|
||||
|
||||
type CompositeKeysOf<TAggregationContainer extends AggregationsAggregationContainer> =
|
||||
TAggregationContainer extends {
|
||||
|
|
|
@ -24,6 +24,9 @@ const updateInvestigationParamsSchema = z.object({
|
|||
}),
|
||||
tags: z.array(z.string()),
|
||||
externalIncidentUrl: z.string().nullable(),
|
||||
rootCauseAnalysis: z.object({
|
||||
events: z.array(z.any()),
|
||||
}),
|
||||
})
|
||||
.partial(),
|
||||
});
|
||||
|
|
|
@ -35,6 +35,11 @@ const investigationSchema = z.object({
|
|||
notes: z.array(investigationNoteSchema),
|
||||
items: z.array(investigationItemSchema),
|
||||
externalIncidentUrl: z.string().nullable(),
|
||||
rootCauseAnalysis: z
|
||||
.object({
|
||||
events: z.array(z.any()),
|
||||
})
|
||||
.optional(),
|
||||
});
|
||||
|
||||
type Status = z.infer<typeof statusSchema>;
|
||||
|
|
|
@ -98,8 +98,15 @@ export function registerRoutes<TDependencies extends Record<string, any>>({
|
|||
if (isKibanaResponse(result)) {
|
||||
return result;
|
||||
} else if (isObservable(result)) {
|
||||
const controller = new AbortController();
|
||||
request.events.aborted$.subscribe(() => {
|
||||
controller.abort();
|
||||
});
|
||||
return response.ok({
|
||||
body: observableIntoEventSourceStream(result as Observable<ServerSentEvent>),
|
||||
body: observableIntoEventSourceStream(result as Observable<ServerSentEvent>, {
|
||||
logger,
|
||||
signal: controller.signal,
|
||||
}),
|
||||
});
|
||||
} else {
|
||||
const body = result || {};
|
||||
|
|
|
@ -0,0 +1,198 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
import { Logger } from '@kbn/logging';
|
||||
import { observableIntoEventSourceStream } from './observable_into_event_source_stream';
|
||||
import { PassThrough } from 'node:stream';
|
||||
import { Subject } from 'rxjs';
|
||||
import { ServerSentEvent, ServerSentEventType } from '@kbn/sse-utils/src/events';
|
||||
import {
|
||||
ServerSentEventErrorCode,
|
||||
createSSEInternalError,
|
||||
createSSERequestError,
|
||||
} from '@kbn/sse-utils/src/errors';
|
||||
|
||||
describe('observableIntoEventSourceStream', () => {
|
||||
let logger: jest.Mocked<Logger>;
|
||||
|
||||
let controller: AbortController;
|
||||
|
||||
let stream: PassThrough;
|
||||
let source$: Subject<ServerSentEvent>;
|
||||
|
||||
let data: string[];
|
||||
|
||||
beforeEach(() => {
|
||||
jest.useFakeTimers();
|
||||
logger = {
|
||||
debug: jest.fn(),
|
||||
error: jest.fn(),
|
||||
} as unknown as jest.Mocked<Logger>;
|
||||
|
||||
controller = new AbortController();
|
||||
source$ = new Subject();
|
||||
data = [];
|
||||
|
||||
stream = observableIntoEventSourceStream(source$, { logger, signal: controller.signal });
|
||||
stream.on('data', (chunk) => {
|
||||
data.push(chunk.toString());
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
jest.clearAllTimers();
|
||||
});
|
||||
|
||||
it('writes events into the stream in SSE format', () => {
|
||||
source$.next({ type: ServerSentEventType.data, data: { foo: 'bar' } });
|
||||
source$.complete();
|
||||
|
||||
jest.runAllTimers();
|
||||
|
||||
expect(data).toEqual(['event: data\ndata: {"data":{"foo":"bar"}}\n\n']);
|
||||
});
|
||||
|
||||
it('handles SSE errors', () => {
|
||||
const sseError = createSSEInternalError('Invalid input');
|
||||
|
||||
source$.error(sseError);
|
||||
|
||||
jest.runAllTimers();
|
||||
|
||||
expect(logger.error).toHaveBeenCalledWith(sseError);
|
||||
expect(logger.debug).toHaveBeenCalled();
|
||||
const debugFn = logger.debug.mock.calls[0][0] as () => string;
|
||||
const loggedError = JSON.parse(debugFn());
|
||||
expect(loggedError).toEqual({
|
||||
type: 'error',
|
||||
error: {
|
||||
code: ServerSentEventErrorCode.internalError,
|
||||
message: 'Invalid input',
|
||||
meta: {},
|
||||
},
|
||||
});
|
||||
|
||||
expect(data).toEqual([
|
||||
`event: error\ndata: ${JSON.stringify({
|
||||
error: {
|
||||
code: ServerSentEventErrorCode.internalError,
|
||||
message: 'Invalid input',
|
||||
meta: {},
|
||||
},
|
||||
})}\n\n`,
|
||||
]);
|
||||
});
|
||||
|
||||
it('handles SSE errors with metadata', () => {
|
||||
const sseError = createSSERequestError('Invalid request', 400);
|
||||
|
||||
source$.error(sseError);
|
||||
|
||||
jest.runAllTimers();
|
||||
|
||||
expect(logger.error).toHaveBeenCalledWith(sseError);
|
||||
expect(logger.debug).toHaveBeenCalled();
|
||||
const debugFn = logger.debug.mock.calls[0][0] as () => string;
|
||||
const loggedError = JSON.parse(debugFn());
|
||||
expect(loggedError).toEqual({
|
||||
type: 'error',
|
||||
error: {
|
||||
code: ServerSentEventErrorCode.requestError,
|
||||
message: 'Invalid request',
|
||||
meta: {
|
||||
status: 400,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(data).toEqual([
|
||||
`event: error\ndata: ${JSON.stringify({
|
||||
error: {
|
||||
code: ServerSentEventErrorCode.requestError,
|
||||
message: 'Invalid request',
|
||||
meta: {
|
||||
status: 400,
|
||||
},
|
||||
},
|
||||
})}\n\n`,
|
||||
]);
|
||||
});
|
||||
|
||||
it('handles non-SSE errors', () => {
|
||||
const error = new Error('Non-SSE Error');
|
||||
|
||||
source$.error(error);
|
||||
|
||||
jest.runAllTimers();
|
||||
|
||||
expect(logger.error).toHaveBeenCalledWith(error);
|
||||
expect(data).toEqual([
|
||||
`event: error\ndata: ${JSON.stringify({
|
||||
error: {
|
||||
code: ServerSentEventErrorCode.internalError,
|
||||
message: 'Non-SSE Error',
|
||||
},
|
||||
})}\n\n`,
|
||||
]);
|
||||
});
|
||||
|
||||
it('should send keep-alive comments every 10 seconds', () => {
|
||||
jest.advanceTimersByTime(10000);
|
||||
expect(data).toContain(': keep-alive');
|
||||
|
||||
jest.advanceTimersByTime(10000);
|
||||
expect(data.filter((d) => d === ': keep-alive')).toHaveLength(2);
|
||||
});
|
||||
|
||||
describe('without fake timers', () => {
|
||||
beforeEach(() => {
|
||||
jest.useFakeTimers({ doNotFake: ['nextTick'] });
|
||||
});
|
||||
|
||||
it('should end the stream when the observable completes', async () => {
|
||||
jest.useFakeTimers({ doNotFake: ['nextTick'] });
|
||||
|
||||
const endSpy = jest.fn();
|
||||
stream.on('end', endSpy);
|
||||
|
||||
source$.complete();
|
||||
|
||||
await new Promise((resolve) => process.nextTick(resolve));
|
||||
|
||||
expect(endSpy).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should end stream when signal is aborted', async () => {
|
||||
const endSpy = jest.fn();
|
||||
stream.on('end', endSpy);
|
||||
|
||||
// Emit some data
|
||||
source$.next({ type: ServerSentEventType.data, data: { initial: 'data' } });
|
||||
|
||||
// Abort the signal
|
||||
controller.abort();
|
||||
|
||||
// Emit more data after abort
|
||||
source$.next({ type: ServerSentEventType.data, data: { after: 'abort' } });
|
||||
|
||||
await new Promise((resolve) => process.nextTick(resolve));
|
||||
|
||||
expect(endSpy).toHaveBeenCalled();
|
||||
|
||||
// Data after abort should not be received
|
||||
expect(data).toEqual([
|
||||
`event: data\ndata: ${JSON.stringify({ data: { initial: 'data' } })}\n\n`,
|
||||
]);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
jest.useFakeTimers();
|
||||
});
|
||||
});
|
||||
});
|
|
@ -7,12 +7,51 @@
|
|||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
import { map, Observable } from 'rxjs';
|
||||
import { Logger } from '@kbn/logging';
|
||||
import {
|
||||
isSSEError,
|
||||
ServerSentErrorEvent,
|
||||
ServerSentEventErrorCode,
|
||||
} from '@kbn/sse-utils/src/errors';
|
||||
import { ServerSentEvent, ServerSentEventType } from '@kbn/sse-utils/src/events';
|
||||
import { catchError, map, Observable, of } from 'rxjs';
|
||||
import { PassThrough } from 'stream';
|
||||
import { ServerSentEvent } from '@kbn/sse-utils';
|
||||
|
||||
export function observableIntoEventSourceStream(source$: Observable<ServerSentEvent>): PassThrough {
|
||||
const withSerializedEvents$ = source$.pipe(
|
||||
export function observableIntoEventSourceStream(
|
||||
source$: Observable<ServerSentEvent>,
|
||||
{
|
||||
logger,
|
||||
signal,
|
||||
}: {
|
||||
logger: Pick<Logger, 'debug' | 'error'>;
|
||||
signal: AbortSignal;
|
||||
}
|
||||
) {
|
||||
const withSerializedErrors$ = source$.pipe(
|
||||
catchError((error): Observable<ServerSentErrorEvent> => {
|
||||
if (isSSEError(error)) {
|
||||
logger.error(error);
|
||||
logger.debug(() => JSON.stringify(error));
|
||||
return of({
|
||||
type: ServerSentEventType.error,
|
||||
error: {
|
||||
code: error.code,
|
||||
message: error.message,
|
||||
meta: error.meta,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
logger.error(error);
|
||||
|
||||
return of({
|
||||
type: ServerSentEventType.error,
|
||||
error: {
|
||||
code: ServerSentEventErrorCode.internalError,
|
||||
message: error.message as string,
|
||||
},
|
||||
});
|
||||
}),
|
||||
map((event) => {
|
||||
const { type, ...rest } = event;
|
||||
return `event: ${type}\ndata: ${JSON.stringify(rest)}\n\n`;
|
||||
|
@ -21,18 +60,38 @@ export function observableIntoEventSourceStream(source$: Observable<ServerSentEv
|
|||
|
||||
const stream = new PassThrough();
|
||||
|
||||
withSerializedEvents$.subscribe({
|
||||
const intervalId = setInterval(() => {
|
||||
// `:` denotes a comment - this is to keep the connection open
|
||||
// it will be ignored by the SSE parser on the client
|
||||
stream.write(': keep-alive');
|
||||
}, 10000);
|
||||
|
||||
const subscription = withSerializedErrors$.subscribe({
|
||||
next: (line) => {
|
||||
stream.write(line);
|
||||
},
|
||||
complete: () => {
|
||||
stream.end();
|
||||
clearTimeout(intervalId);
|
||||
},
|
||||
error: (error) => {
|
||||
stream.write(`event: error\ndata: ${JSON.stringify(error)}\n\n`);
|
||||
clearTimeout(intervalId);
|
||||
stream.write(
|
||||
`event:error\ndata: ${JSON.stringify({
|
||||
error: {
|
||||
code: ServerSentEventErrorCode.internalError,
|
||||
message: error.message,
|
||||
},
|
||||
})}\n\n`
|
||||
);
|
||||
stream.end();
|
||||
},
|
||||
});
|
||||
|
||||
signal.addEventListener('abort', () => {
|
||||
subscription.unsubscribe();
|
||||
stream.end();
|
||||
});
|
||||
|
||||
return stream;
|
||||
}
|
||||
|
|
|
@ -15,5 +15,6 @@
|
|||
],
|
||||
"kbn_references": [
|
||||
"@kbn/sse-utils",
|
||||
"@kbn/logging",
|
||||
]
|
||||
}
|
||||
|
|
|
@ -21,7 +21,8 @@ function myRequestHandler(
|
|||
data: {
|
||||
anyData: {},
|
||||
},
|
||||
})
|
||||
}),
|
||||
logger
|
||||
),
|
||||
});
|
||||
}
|
||||
|
|
|
@ -1318,6 +1318,10 @@
|
|||
"@kbn/observability-ai-assistant-management-plugin/*": ["x-pack/plugins/observability_solution/observability_ai_assistant_management/*"],
|
||||
"@kbn/observability-ai-assistant-plugin": ["x-pack/plugins/observability_solution/observability_ai_assistant"],
|
||||
"@kbn/observability-ai-assistant-plugin/*": ["x-pack/plugins/observability_solution/observability_ai_assistant/*"],
|
||||
"@kbn/observability-ai-common": ["x-pack/packages/observability/observability_ai/observability_ai_common"],
|
||||
"@kbn/observability-ai-common/*": ["x-pack/packages/observability/observability_ai/observability_ai_common/*"],
|
||||
"@kbn/observability-ai-server": ["x-pack/packages/observability/observability_ai/observability_ai_server"],
|
||||
"@kbn/observability-ai-server/*": ["x-pack/packages/observability/observability_ai/observability_ai_server/*"],
|
||||
"@kbn/observability-alert-details": ["x-pack/packages/observability/alert_details"],
|
||||
"@kbn/observability-alert-details/*": ["x-pack/packages/observability/alert_details/*"],
|
||||
"@kbn/observability-alerting-rule-utils": ["x-pack/packages/observability/alerting_rule_utils"],
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
module.exports = {
|
||||
preset: '@kbn/test',
|
||||
rootDir: '../../../../..',
|
||||
roots: [
|
||||
'<rootDir>/x-pack/packages/observability/observability_ai/observability_ai_common',
|
||||
'<rootDir>/x-pack/packages/observability/observability_ai/observability_ai_server',
|
||||
],
|
||||
};
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"type": "shared-common",
|
||||
"id": "@kbn/observability-ai-common",
|
||||
"owner": "@elastic/obs-ai-assistant",
|
||||
"group": "observability",
|
||||
"visibility": "private"
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"name": "@kbn/observability-ai-common",
|
||||
"private": true,
|
||||
"version": "1.0.0",
|
||||
"license": "Elastic License 2.0"
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
export {
|
||||
RCA_END_PROCESS_TOOL_NAME,
|
||||
RCA_INVESTIGATE_ENTITY_TOOL_NAME,
|
||||
RCA_OBSERVE_TOOL_NAME,
|
||||
} from './tool_names';
|
|
@ -0,0 +1,10 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
export const RCA_OBSERVE_TOOL_NAME = 'observe';
|
||||
export const RCA_END_PROCESS_TOOL_NAME = 'endProcessAndWriteReport';
|
||||
export const RCA_INVESTIGATE_ENTITY_TOOL_NAME = 'investigateEntity';
|
|
@ -0,0 +1,20 @@
|
|||
{
|
||||
"extends": "../../../../../tsconfig.base.json",
|
||||
"compilerOptions": {
|
||||
"outDir": "target/types",
|
||||
"types": [
|
||||
"jest",
|
||||
"node",
|
||||
"react"
|
||||
]
|
||||
},
|
||||
"include": [
|
||||
"**/*.ts",
|
||||
"**/*.tsx",
|
||||
],
|
||||
"exclude": [
|
||||
"target/**/*"
|
||||
],
|
||||
"kbn_references": [
|
||||
]
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
module.exports = {
|
||||
preset: '@kbn/test',
|
||||
rootDir: '../../../../..',
|
||||
roots: ['<rootDir>/x-pack/packages/observability/observability_ai/observability_ai_server'],
|
||||
};
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"type": "shared-server",
|
||||
"id": "@kbn/observability-ai-server",
|
||||
"owner": "@elastic/obs-ai-assistant",
|
||||
"group": "observability",
|
||||
"visibility": "private"
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"name": "@kbn/observability-ai-server",
|
||||
"private": true,
|
||||
"version": "1.0.0",
|
||||
"license": "Elastic License 2.0"
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { from, Observable, of, switchMap } from 'rxjs';
|
||||
import { RCA_END_PROCESS_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis';
|
||||
import { AssistantMessage, MessageRole } from '@kbn/inference-common';
|
||||
import { writeFinalReport } from './tasks/write_final_report';
|
||||
import { EndProcessToolMessage, RootCauseAnalysisContext } from './types';
|
||||
import { generateSignificantEventsTimeline } from './tasks/generate_timeline';
|
||||
import { EMPTY_ASSISTANT_MESSAGE } from './empty_assistant_message';
|
||||
|
||||
export function callEndRcaProcessTool({
|
||||
rcaContext,
|
||||
toolCallId,
|
||||
}: {
|
||||
rcaContext: RootCauseAnalysisContext;
|
||||
toolCallId: string;
|
||||
}): Observable<EndProcessToolMessage | AssistantMessage> {
|
||||
return from(
|
||||
writeFinalReport({
|
||||
rcaContext,
|
||||
})
|
||||
).pipe(
|
||||
switchMap((report) => {
|
||||
return from(
|
||||
generateSignificantEventsTimeline({
|
||||
rcaContext,
|
||||
report,
|
||||
}).then((timeline) => {
|
||||
return { timeline, report };
|
||||
})
|
||||
);
|
||||
}),
|
||||
switchMap(({ report, timeline }) => {
|
||||
const toolMessage: EndProcessToolMessage = {
|
||||
name: RCA_END_PROCESS_TOOL_NAME,
|
||||
role: MessageRole.Tool,
|
||||
toolCallId,
|
||||
response: {
|
||||
report,
|
||||
timeline,
|
||||
},
|
||||
};
|
||||
return of(toolMessage, EMPTY_ASSISTANT_MESSAGE);
|
||||
})
|
||||
);
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { from, Observable, of, switchMap } from 'rxjs';
|
||||
import { MessageRole } from '@kbn/inference-common';
|
||||
import { RCA_INVESTIGATE_ENTITY_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis';
|
||||
import { InvestigateEntityToolMessage, RootCauseAnalysisContext, ToolErrorMessage } from './types';
|
||||
import { investigateEntity } from './tasks/investigate_entity';
|
||||
import { formatEntity } from './util/format_entity';
|
||||
|
||||
export function callInvestigateEntityTool({
|
||||
field,
|
||||
value,
|
||||
context,
|
||||
toolCallId,
|
||||
rcaContext,
|
||||
}: {
|
||||
field: string;
|
||||
value: string;
|
||||
context: string;
|
||||
toolCallId: string;
|
||||
rcaContext: RootCauseAnalysisContext;
|
||||
}): Observable<InvestigateEntityToolMessage | ToolErrorMessage> {
|
||||
const nextEntity = {
|
||||
[field]: value,
|
||||
};
|
||||
|
||||
return from(
|
||||
investigateEntity({
|
||||
rcaContext,
|
||||
entity: nextEntity,
|
||||
context,
|
||||
})
|
||||
).pipe(
|
||||
switchMap((entityInvestigation) => {
|
||||
if (!entityInvestigation) {
|
||||
const entityNotFoundToolMessage: ToolErrorMessage = {
|
||||
name: 'error',
|
||||
role: MessageRole.Tool,
|
||||
response: {
|
||||
error: {
|
||||
message: `Entity ${formatEntity(nextEntity)} not found, have
|
||||
you verified it exists and if the field and value you are using
|
||||
are correct?`,
|
||||
},
|
||||
},
|
||||
toolCallId,
|
||||
};
|
||||
|
||||
return of(entityNotFoundToolMessage);
|
||||
}
|
||||
|
||||
const {
|
||||
attachments,
|
||||
relatedEntities,
|
||||
entity: investigatedEntity,
|
||||
summary,
|
||||
} = entityInvestigation;
|
||||
const toolMessage: InvestigateEntityToolMessage = {
|
||||
name: RCA_INVESTIGATE_ENTITY_TOOL_NAME,
|
||||
role: MessageRole.Tool as const,
|
||||
toolCallId,
|
||||
response: {
|
||||
entity: investigatedEntity,
|
||||
relatedEntities,
|
||||
summary,
|
||||
},
|
||||
data: {
|
||||
attachments,
|
||||
},
|
||||
};
|
||||
|
||||
return of(toolMessage);
|
||||
})
|
||||
);
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { AssistantMessage, MessageRole } from '@kbn/inference-common';
|
||||
import {
|
||||
RCA_INVESTIGATE_ENTITY_TOOL_NAME,
|
||||
RCA_OBSERVE_TOOL_NAME,
|
||||
} from '@kbn/observability-ai-common/root_cause_analysis';
|
||||
import { compact, findLast } from 'lodash';
|
||||
import { from, Observable, of, switchMap } from 'rxjs';
|
||||
import { observeInvestigationResults } from './tasks/observe_investigation_results';
|
||||
import {
|
||||
InvestigateEntityToolMessage,
|
||||
ObservationToolMessage,
|
||||
RootCauseAnalysisContext,
|
||||
RootCauseAnalysisEvent,
|
||||
} from './types';
|
||||
|
||||
export function callObserveTool({
|
||||
rcaContext,
|
||||
toolCallId,
|
||||
}: {
|
||||
rcaContext: RootCauseAnalysisContext;
|
||||
toolCallId: string;
|
||||
}): Observable<ObservationToolMessage> {
|
||||
const { events } = rcaContext;
|
||||
|
||||
const lastAssistantMessage = findLast(
|
||||
events.slice(0, -1),
|
||||
(event): event is Extract<RootCauseAnalysisEvent, AssistantMessage> =>
|
||||
event.role === MessageRole.Assistant
|
||||
);
|
||||
|
||||
const toolMessagesByToolCallId = Object.fromEntries(
|
||||
compact(
|
||||
events.map((message) =>
|
||||
'toolCallId' in message &&
|
||||
(message.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME || message.name === 'error')
|
||||
? [message.toolCallId, message]
|
||||
: undefined
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
const investigationToolMessages =
|
||||
lastAssistantMessage && lastAssistantMessage.toolCalls
|
||||
? compact(
|
||||
lastAssistantMessage.toolCalls.map((investigateEntityToolCall) => {
|
||||
if (investigateEntityToolCall.function.name !== RCA_INVESTIGATE_ENTITY_TOOL_NAME) {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
toolCall: investigateEntityToolCall,
|
||||
toolResponse: toolMessagesByToolCallId[investigateEntityToolCall.toolCallId],
|
||||
};
|
||||
})
|
||||
)
|
||||
: [];
|
||||
|
||||
const investigations = investigationToolMessages
|
||||
.map((toolMessage) => toolMessage.toolResponse)
|
||||
.filter(
|
||||
(toolResponse): toolResponse is InvestigateEntityToolMessage =>
|
||||
toolResponse.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME
|
||||
)
|
||||
.map((toolResponse) => ({ ...toolResponse.data, ...toolResponse.response }));
|
||||
|
||||
return from(
|
||||
observeInvestigationResults({
|
||||
rcaContext,
|
||||
investigations,
|
||||
})
|
||||
).pipe(
|
||||
switchMap((summary) => {
|
||||
const observationToolMessage: ObservationToolMessage = {
|
||||
name: RCA_OBSERVE_TOOL_NAME,
|
||||
response: {
|
||||
content: summary.content,
|
||||
},
|
||||
data: summary,
|
||||
role: MessageRole.Tool,
|
||||
toolCallId,
|
||||
};
|
||||
return of(observationToolMessage);
|
||||
})
|
||||
);
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { AssistantMessage, MessageRole } from '@kbn/inference-common';
|
||||
import { RootCauseAnalysisEvent } from './types';
|
||||
|
||||
export const EMPTY_ASSISTANT_MESSAGE: Extract<RootCauseAnalysisEvent, AssistantMessage> = {
|
||||
content: '',
|
||||
role: MessageRole.Assistant,
|
||||
toolCalls: [],
|
||||
};
|
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
export type {
|
||||
RootCauseAnalysisEvent,
|
||||
InvestigateEntityToolMessage,
|
||||
EndProcessToolMessage,
|
||||
ObservationToolMessage,
|
||||
RootCauseAnalysisToolMessage,
|
||||
ToolErrorMessage,
|
||||
RootCauseAnalysisToolRequest,
|
||||
} from './types';
|
||||
export type { SignificantEventsTimeline, SignificantEvent } from './tasks/generate_timeline';
|
||||
export type { EntityInvestigation } from './tasks/investigate_entity';
|
||||
|
||||
export { runRootCauseAnalysis } from './run_root_cause_analysis';
|
|
@ -0,0 +1,345 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
export const RCA_SYSTEM_PROMPT_BASE = `You are a helpful assistant for Elastic Observability.
|
||||
You are a distinguished SRE, who has an established career, working in both
|
||||
small shops and FAANG-level companies. You have worked with Elasticsearch
|
||||
since the beginning and expertly use it in your analysis of incidents.
|
||||
|
||||
You use an evidence-based strategy to determine the root cause of
|
||||
an incident. You thoroughly analyze Observability data. You use your
|
||||
understanding of different architectures like microservies, monoliths,
|
||||
event-driven systems, and environments like Kubernetes to discover
|
||||
patterns and correlations in the data ingested into the user's system.
|
||||
|
||||
Your sizable experience with monitoring software systems has taught
|
||||
you how to investigate issues and correlate symptoms of the investigate
|
||||
service with its dependencies.
|
||||
|
||||
## Capabilities
|
||||
|
||||
You are highly skilled at inspecting logs, traces, alerts, and SLOs to uncover
|
||||
the root cause of incidents, with a special emphasis on detecting log patterns
|
||||
that reveal system behavior. You can identify related entities, such as upstream
|
||||
services or the specific pod a service is running on, by searching through logs
|
||||
and traces for relationships using metadata like IP addresses, session IDs, or
|
||||
distributed tracing data. While you can analyze alerts and SLO-derived metrics,
|
||||
you do not directly analyze other system metrics, inspect files, or execute
|
||||
commands that modify the system.
|
||||
|
||||
## Non-capabilities
|
||||
|
||||
You lack the capabilities to analyze metrics or connect to external systems.`;
|
||||
|
||||
export const RCA_PROMPT_ENTITIES = `# Entities
|
||||
|
||||
In an Observability system, entities are distinct components or resources within
|
||||
the infrastructure, each representing points of interest for monitoring and
|
||||
troubleshooting. These entities form the backbone of log-based analysis and
|
||||
allow teams to track behavior, detect anomalies, and investigate issues across
|
||||
different layers of the system. Here’s a breakdown of common entities in
|
||||
observability:
|
||||
|
||||
1. Services: Core units of functionality in an application ecosystem,
|
||||
representing individual processes or applications (e.g., user-authentication,
|
||||
payment processing). Services typically expose APIs or endpoints, and logs from
|
||||
these entities often capture requests, responses, and error events, which are
|
||||
critical for understanding application behavior.
|
||||
|
||||
2. Kubernetes (K8s) Entities:
|
||||
- Pods: The smallest deployable units in Kubernetes, usually containing one
|
||||
or more containers. Logs from pods provide insight into container operations,
|
||||
errors, and application states.
|
||||
- Namespaces: Logical groupings within a cluster for organizing and isolating
|
||||
resources, helping in filtering logs by domain or responsibility.
|
||||
- Nodes: Worker machines (either physical or virtual) where pods run. Node
|
||||
logs often cover hardware resource events, errors, and other system-level events
|
||||
relevant to pod health and performance.
|
||||
- Deployments and ReplicaSets: Define and manage the desired state of pod
|
||||
replication and rolling updates. Logs from these components can reveal changes
|
||||
in application versions, scaling events, and configuration updates.
|
||||
|
||||
3. Virtual Machines (VMs): Virtualized computing resources that generate
|
||||
operating system-level logs capturing events such as application crashes,
|
||||
network issues, and OS-related errors.
|
||||
|
||||
4. Applications: Software systems or packages running across the infrastructure,n
|
||||
which may encompass multiple services. Logs from applications track user flows,
|
||||
application states, and error messages, providing context for user interactions
|
||||
and system events.
|
||||
|
||||
5. Serverless Functions (e.g., AWS Lambda): Code executions triggered by
|
||||
specific events. Logs from serverless functions capture invocation details,
|
||||
execution paths, and error traces, which are useful for understanding specific
|
||||
function behaviors and pinpointing execution anomalies.
|
||||
|
||||
6. Databases and Data Stores: Includes SQL/NoSQL databases, caches, and storage
|
||||
solutions. Logs from these entities cover query executions, connection issues,
|
||||
and transaction errors, essential for tracking data layer issues.
|
||||
|
||||
7. Containers: Portable environments running individual services or processes.
|
||||
Container logs capture application and system events within the containerized
|
||||
environment, helping track process-level errors and status changes.
|
||||
|
||||
8. Load Balancers and API Gateways: Components responsible for managing and
|
||||
routing traffic. Logs from these entities include request paths, status codes,
|
||||
and errors encountered, which can indicate connectivity issues or
|
||||
misconfigurations.
|
||||
|
||||
9. Networking Components: Entities like virtual private clouds (VPCs),
|
||||
firewalls, VPNs, and network interfaces. Logs from these components track
|
||||
traffic flows, connectivity issues, and security events, crucial for identifying
|
||||
network-related anomalies.
|
||||
|
||||
10. Clusters and Regions: Groupings of infrastructure either physically or
|
||||
logically, such as across data centers or cloud regions. Cluster and region logs
|
||||
help capture high-level events and error messages, useful for understanding
|
||||
system-wide issues and region-specific disruptions.
|
||||
|
||||
Each of these entities is typically identified by fields such as
|
||||
\`service.name\`, \`kubernetes.pod.name\`, \`container.id\`, or similar fields
|
||||
in log records. Observability systems use these identifiers to connect entities,
|
||||
creating a map of relationships and dependencies that helps teams diagnose
|
||||
issues, understand cross-entity impacts, and uncover root causes in distributed
|
||||
architectures.`;
|
||||
|
||||
export const RCA_PROMPT_DEPENDENCIES = `## Understanding the Flow: Upstream vs. Downstream
|
||||
|
||||
- Upstream dependencies: These are the services that your service
|
||||
depends on. They supply data, perform tasks, or provide resources that
|
||||
your service consumes.
|
||||
- Downstream dependencies: These are the services that depend on your
|
||||
service. They consume the data or resources your service generates.
|
||||
|
||||
When diagnosing issues, distinguishing the direction of dependency can
|
||||
clarify whether a problem originates from your service’s reliance on an
|
||||
external input or whether your service is causing issues for other systems.
|
||||
|
||||
---
|
||||
|
||||
## When to Investigate Upstream Dependencies
|
||||
|
||||
Upstream issues typically occur when your service is failing due to problems
|
||||
with the responses it receives from external systems.
|
||||
|
||||
1. Timeouts and Latency
|
||||
- Symptoms: Slow response times, retries, or timeouts.
|
||||
- Errors: HTTP 504, retrying connection, exceeded timeout threshold.
|
||||
- Focus: Check the performance and availability of upstream services
|
||||
(e.g., APIs, databases) and network latency.
|
||||
|
||||
2. Data Integrity Issues**
|
||||
- Symptoms: Inconsistent or corrupted data.
|
||||
- Errors: unexpected data format, deserialization errors.
|
||||
- Focus: Verify data received from upstream services, and investigate
|
||||
schema or data format changes.
|
||||
|
||||
3. Connection Failures
|
||||
- Symptoms: Your service cannot connect to upstream services.
|
||||
- Errors: DNS lookup failed, connection refused, socket timeout.
|
||||
- Focus: Check upstream service health, DNS, and networking components.
|
||||
|
||||
4. Authentication/Authorization Failures**
|
||||
- Symptoms: Failed access to upstream resources.
|
||||
- Errors: 401 Unauthorized, 403 Forbidden, token issues.
|
||||
- Focus: Validate credentials or tokens and investigate upstream access
|
||||
policies.
|
||||
|
||||
---
|
||||
|
||||
## When to Investigate Downstream Dependencies
|
||||
|
||||
Downstream issues occur when your service is functioning but its outputs cause
|
||||
failures in other services that depend on it.
|
||||
|
||||
1. Data or API Response Issues
|
||||
- Symptoms: Downstream services receive bad or invalid data.
|
||||
- Errors: data type mismatch, invalid JSON format.
|
||||
- Focus: Ensure your service is returning correct data and check for API
|
||||
changes.
|
||||
|
||||
2. Rate-Limiting and Resource Exhaustion**
|
||||
- Symptoms: Downstream services are overwhelmed.
|
||||
- Errors: 429 Too Many Requests, throttling or resource exhaustion.
|
||||
- Focus: Check your service’s request rates and resource usage (e.g., memory, CPU).
|
||||
|
||||
3. Unexpected Behavior or Regression
|
||||
- Symptoms: Downstream failures after a recent deployment.
|
||||
- Errors: New downstream errors after your service changes.
|
||||
- Focus: Review recent updates, API contracts, or integration points.
|
||||
|
||||
4. Eventual Consistency or Queue Backlogs
|
||||
- Symptoms: Delayed processing in downstream systems.
|
||||
- Errors: message queue full, backlog warnings.
|
||||
- Focus: Check event production rates and queue statuses in downstream services.`;
|
||||
|
||||
export const RCA_PROMPT_CHANGES = `## Reasoning about Correlating Changes in Incident Investigations
|
||||
|
||||
In a root cause analysis, understanding the types and timing of changes is key
|
||||
to linking symptoms with underlying causes. Changes can broadly be classified
|
||||
into **symptomatic changes** (indicators of system issues like elevated error
|
||||
rates or degraded throughput) and **system changes** (events that modify system
|
||||
configuration or structure, such as scale-downs, new version rollouts, or
|
||||
significant configuration adjustments). By correlating these changes, we can
|
||||
assess whether observed symptoms are likely related to specific system
|
||||
modifications.
|
||||
|
||||
### Identifying Correlations Between Symptomatic and System Changes
|
||||
|
||||
When investigating a sudden issue—such as a 5x increase in latency—it’s
|
||||
essential to evaluate both the **timing** and **nature** of associated changes
|
||||
in upstream dependencies, resource utilization, and configuration events. For
|
||||
instance:
|
||||
|
||||
- Consistent Symptomatic Behavior: If an upstream dependency exhibits a
|
||||
similar, sustained latency spike around the same time and shows log entries
|
||||
indicating CPU throttling, this would suggest a correlated, persistent issue
|
||||
that may directly impact the observed symptom. A scale-down event preceding the
|
||||
latency increase might indicate that reduced resources are stressing the
|
||||
dependency.
|
||||
|
||||
- Transient vs. Persistent Issues: Another upstream dependency that
|
||||
experiences a brief latency increase but recovers quickly is less likely
|
||||
related. Short-lived changes that self-correct without intervention typically
|
||||
have different root causes or may be unrelated noise.
|
||||
|
||||
### Types of Changes to Consider in Correlation
|
||||
|
||||
1. Log Pattern Changes: A shift in log patterns, especially around error
|
||||
levels, provides significant insight. If there’s an increase in critical or
|
||||
warning log patterns for a dependency during the latency spike, it could
|
||||
indicate that the issue stems from this entity. Compare these log patterns to
|
||||
past behavior to assess whether they represent an anomaly that might warrant
|
||||
further investigation.
|
||||
|
||||
2. Event-Driven System Changes:
|
||||
- Scale Events: Scale-up or scale-down events can directly impact
|
||||
performance. If a latency increase aligns with a scale-down, it may suggest that
|
||||
resource reduction is straining the system.
|
||||
- Release or Deployment Events: A new version rollout or config change is
|
||||
a frequent source of correlated issues. Compare the timing of the latency
|
||||
increase to the deployment to see if the change directly impacts the system.
|
||||
Correlate with alerts or SLO breaches on endpoints to understand the immediate
|
||||
effects of the release.
|
||||
|
||||
3. SLO and Alert-Based Changes: SLO breaches and alerts can provide concrete
|
||||
timestamps for when symptoms begin. For instance, a breach on error rates for a
|
||||
specific service endpoint following a dependency’s scale-down event suggests a
|
||||
possible causal link. An alert indicating sustained latency increase in a
|
||||
dependency that remains unresolved points to a high-priority area for deeper
|
||||
investigation.
|
||||
|
||||
4. Dependency Health and Behavior:
|
||||
- Related vs. Unrelated Dependencies: Similar to the latency example,
|
||||
observe if multiple dependencies experience symptomatic changes simultaneously.
|
||||
Related dependencies should show consistent, similar issues, while unrelated
|
||||
dependencies may exhibit brief, unrelated spikes. Persistent issues across key
|
||||
dependencies likely indicate a systemic cause, while isolated changes are less
|
||||
likely to be relevant.
|
||||
|
||||
### Examples of Reasoning Through Changes
|
||||
|
||||
Consider these scenarios:
|
||||
- Increase in Error Rates and a Recent Deployment: Suppose error rates for
|
||||
an endpoint increase sharply post-deployment. If related logs show new error
|
||||
patterns, this aligns the symptom with a deployment change. Investigate specific
|
||||
changes in the deployment (e.g., code changes or resource allocation).
|
||||
- Throughput Decrease and Scaling Events: If throughput dips shortly after a
|
||||
scale-down event, it might suggest resource constraints. Analyze CPU or memory
|
||||
throttling logs from this period in upstream dependencies to confirm.
|
||||
- Cross-Service Latency Spikes: If multiple services along a call path
|
||||
experience latency spikes, with CPU throttling logs, this suggests a resource
|
||||
bottleneck. Trace logs and alerts related to autoscaling decisions may provide
|
||||
insights into whether the system configuration caused cascading delays.
|
||||
|
||||
By carefully mapping these changes and analyzing their timing, you can
|
||||
distinguish between causally related events and incidental changes, allowing for
|
||||
a targeted and effective investigation.`;
|
||||
|
||||
export const RCA_PROMPT_CHANGE_POINTS = `## Change points
|
||||
|
||||
Change points can be defined as the following type:
|
||||
|
||||
- \`dip\`: a significant dip occurs at this change point
|
||||
- \`distribution_change\`: the overall distribution of the values has changed
|
||||
significantly
|
||||
- \`non_stationary\`: there is no change point, but the values are not from a
|
||||
stationary distribution
|
||||
- \`spike\`: a significant spike occurs at this point
|
||||
- \`stationary\`: no change point found
|
||||
- \`step_change\`: the change indicates a statistically significant step up or
|
||||
down in value distribution
|
||||
- \`trend_change\`: there is an overall trend change occurring at this point
|
||||
|
||||
For \`spike\`, and \`dip\`, this means: a short-lived spike or dip that then again
|
||||
stabilizes. For persisted changes, you'd see a \`step_change\` (if the values
|
||||
before and after the change point are stable), or a \`trend_change\` when the
|
||||
values show an upward or downward trend after the change.`;
|
||||
|
||||
export const RCA_PROMPT_SIGNIFICANT_EVENTS = `## Significant events
|
||||
|
||||
Generate a timeline of significant events. These events should capture
|
||||
significant observed changes in the system that can be extracted from the
|
||||
analyzed data. This timeline is absolutely critical to the investigation,
|
||||
and close attention has to be paid to the data, and the instructions.
|
||||
|
||||
The timeline should focus on key events as captured in log patterns, including
|
||||
both notable changes and unusual/critical messages. This data-driven timeline
|
||||
should help establish a chain of causality, pinpointing when anomalies began,
|
||||
what system behaviors were observed, and how these patterns relate to the overall incident.
|
||||
|
||||
- Use ISO timestamps to ensure precision and clarity.
|
||||
- Include alerts that are part of the investigation. For these, use the start
|
||||
time of the alert, and mention critical information about the alert, such as
|
||||
reason and grouping fields.
|
||||
- Focus on log entries that signal significant system behavior (e.g., errors,
|
||||
retries, anomalies).
|
||||
- Highlight critical log messages or changes in patterns that may correlate
|
||||
with the issue.
|
||||
- Include notable anomalies, such as spikes in error rates, unexpected system
|
||||
responses, or any log entries suggesting failure or degradation.
|
||||
|
||||
Do not include:
|
||||
- Events that are indicative of normal operations.
|
||||
- Events that are unlikely to be related to the investigated issue.
|
||||
|
||||
Key Elements to Include:
|
||||
|
||||
- Log Patterns: Capture log messages that show unusual events or
|
||||
abnormalities such as error codes, failed retries, or changes in log frequency.
|
||||
- Timestamps: Ensure every entry in the timeline is time-stamped
|
||||
with an accurate ISO 8601 timestamp.
|
||||
- Event Description: Provide a clear, concise, and objective description of
|
||||
what was observed in the logs.
|
||||
- Corroborating Data: Link log anomalies to other relevant data points such
|
||||
as traffic shifts, request patterns, or upstream/downstream service impacts.`;
|
||||
|
||||
export const RCA_PROMPT_TIMELINE_GUIDE = `
|
||||
The timeline should focus on key events as
|
||||
captured in log patterns, including both notable changes and unusual/critical
|
||||
messages. This data-driven timeline should help establish a chain of causality,
|
||||
pinpointing when anomalies began, what system behaviors were observed, and how
|
||||
these patterns relate to the overall incident.
|
||||
|
||||
- Use ISO timestamps** to ensure precision and clarity.
|
||||
- Focus on log entries** that signal significant system behavior (e.g.,
|
||||
errors, retries, anomalies).
|
||||
- Highlight critical log messages** or changes in patterns that may correlate
|
||||
with the issue.
|
||||
- Include notable anomalies, such as spikes in error rates, unexpected
|
||||
system responses, or any log entries suggesting failure or degradation.
|
||||
|
||||
Key Elements to Include:
|
||||
|
||||
Log Patterns: Capture log messages that show unusual events or
|
||||
abnormalities such as error codes, failed retries, or changes in log frequency.
|
||||
Timestamps: Ensure every entry in the timeline is time-stamped
|
||||
with an accurate ISO 8601 timestamp.
|
||||
Event Description: Provide a clear, concise description of what was
|
||||
observed in the logs.
|
||||
Corroborating Data: Link log anomalies to other relevant data points such
|
||||
as traffic shifts, request patterns, or upstream/downstream service impacts.`;
|
|
@ -0,0 +1,305 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { RulesClient } from '@kbn/alerting-plugin/server';
|
||||
import { calculateAuto } from '@kbn/calculate-auto';
|
||||
import { MessageRole, AssistantMessage, ToolMessage, ToolChoiceType } from '@kbn/inference-common';
|
||||
import { InferenceClient } from '@kbn/inference-plugin/server';
|
||||
import { Logger } from '@kbn/logging';
|
||||
import { AlertsClient } from '@kbn/rule-registry-plugin/server';
|
||||
import { findLast, pick } from 'lodash';
|
||||
import moment from 'moment';
|
||||
import { catchError, filter, from, map, mergeMap, Observable, of, switchMap } from 'rxjs';
|
||||
import { ObservabilityAIAssistantClient } from '@kbn/observability-ai-assistant-plugin/server';
|
||||
import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client';
|
||||
import {
|
||||
RCA_END_PROCESS_TOOL_NAME,
|
||||
RCA_INVESTIGATE_ENTITY_TOOL_NAME,
|
||||
RCA_OBSERVE_TOOL_NAME,
|
||||
} from '@kbn/observability-ai-common/root_cause_analysis';
|
||||
import { callEndRcaProcessTool } from './call_end_rca_process_tool';
|
||||
import { callInvestigateEntityTool } from './call_investigate_entity_tool';
|
||||
import { callObserveTool } from './call_observe_tool';
|
||||
import { RCA_PROMPT_CHANGES, RCA_PROMPT_ENTITIES, RCA_SYSTEM_PROMPT_BASE } from './prompts';
|
||||
import { RCA_TOOLS } from './tools';
|
||||
import {
|
||||
EndProcessToolMessage,
|
||||
InvestigateEntityToolMessage,
|
||||
ObservationToolMessage,
|
||||
RootCauseAnalysisContext,
|
||||
RootCauseAnalysisEvent,
|
||||
ToolErrorMessage,
|
||||
} from './types';
|
||||
import { callTools } from './util/call_tools';
|
||||
import { formatEntity } from './util/format_entity';
|
||||
import { validateInvestigateEntityToolCalls } from './util/validate_investigate_entity_tool_call';
|
||||
|
||||
const SYSTEM_PROMPT_WITH_OBSERVE_INSTRUCTIONS = `${RCA_SYSTEM_PROMPT_BASE}
|
||||
|
||||
Your next step is to request an observation from another agent based
|
||||
on the initial context or the results of previous investigations.`;
|
||||
|
||||
const SYSTEM_PROMPT_WITH_DECISION_INSTRUCTIONS = `${RCA_SYSTEM_PROMPT_BASE}
|
||||
|
||||
${RCA_PROMPT_ENTITIES}
|
||||
|
||||
${RCA_PROMPT_CHANGES}
|
||||
|
||||
To determine whether to end the process or continue analyzing another entity,
|
||||
follow the advice from the previous observation, and these tips:
|
||||
|
||||
Continuing the process:
|
||||
- Do not investigate an entity twice. This will result in a failure.
|
||||
- Logs, traces, or observability data that suggest upstream or downstream
|
||||
issues (such as connection failures, timeouts, or authentication errors)
|
||||
indicate further investigation is required.
|
||||
|
||||
Ending the process:
|
||||
- No further entities to investigate: If there are no unexplored upstream or
|
||||
downstream dependencies, and all related entities have been investigated without
|
||||
discovering new anomalies, it may be appropriate to end the process.
|
||||
- If all investigated entities (e.g., services, hosts, containers) are
|
||||
functioning normally, with no relevant issues found, and there are no signs of
|
||||
dependencies being affected, you may consider ending the process.
|
||||
- Avoid concluding the investigation based solely on symptoms or the absence
|
||||
of immediate errors in the data. Unless a system change has been connected to
|
||||
the incident, it is important to continue investigating dependencies to ensure
|
||||
the root cause has been accurately identified.`;
|
||||
|
||||
export function runRootCauseAnalysis({
|
||||
serviceName,
|
||||
start: requestedStart,
|
||||
end: requestedEnd,
|
||||
esClient,
|
||||
alertsClient,
|
||||
rulesClient,
|
||||
observabilityAIAssistantClient,
|
||||
spaceId,
|
||||
indices,
|
||||
connectorId,
|
||||
inferenceClient,
|
||||
context: initialContext,
|
||||
logger: incomingLogger,
|
||||
prevEvents,
|
||||
}: {
|
||||
context: string;
|
||||
serviceName: string;
|
||||
logger: Logger;
|
||||
inferenceClient: InferenceClient;
|
||||
start: number;
|
||||
end: number;
|
||||
alertsClient: AlertsClient;
|
||||
rulesClient: RulesClient;
|
||||
esClient: ObservabilityElasticsearchClient;
|
||||
observabilityAIAssistantClient: ObservabilityAIAssistantClient;
|
||||
indices: {
|
||||
logs: string[];
|
||||
traces: string[];
|
||||
sloSummaries: string[];
|
||||
};
|
||||
connectorId: string;
|
||||
spaceId: string;
|
||||
prevEvents?: RootCauseAnalysisEvent[];
|
||||
}): Observable<RootCauseAnalysisEvent> {
|
||||
const logger = incomingLogger.get('rca');
|
||||
|
||||
const entity = { 'service.name': serviceName };
|
||||
|
||||
const bucketSize = calculateAuto
|
||||
.atLeast(30, moment.duration(requestedEnd - requestedStart))!
|
||||
.asMilliseconds();
|
||||
|
||||
const start = Math.floor(requestedStart / bucketSize) * bucketSize;
|
||||
const end = Math.floor(requestedEnd / bucketSize) * bucketSize;
|
||||
|
||||
const initialMessage = {
|
||||
role: MessageRole.User as const,
|
||||
content: `Investigate the health status of ${formatEntity(entity)}.
|
||||
|
||||
The context given for this investigation is:
|
||||
|
||||
${initialContext}`,
|
||||
};
|
||||
|
||||
const nextEvents = [initialMessage, ...(prevEvents ?? [])];
|
||||
|
||||
const initialRcaContext: RootCauseAnalysisContext = {
|
||||
connectorId,
|
||||
start,
|
||||
end,
|
||||
esClient,
|
||||
events: nextEvents,
|
||||
indices,
|
||||
inferenceClient,
|
||||
initialContext,
|
||||
alertsClient,
|
||||
observabilityAIAssistantClient,
|
||||
logger,
|
||||
rulesClient,
|
||||
spaceId,
|
||||
tokenLimit: 32_000,
|
||||
};
|
||||
|
||||
const investigationTimeRangePrompt = `## Time range
|
||||
|
||||
The time range of the investigation is ${new Date(start).toISOString()} until ${new Date(
|
||||
end
|
||||
).toISOString()}`;
|
||||
|
||||
initialContext = `${initialContext}
|
||||
|
||||
${investigationTimeRangePrompt}
|
||||
`;
|
||||
|
||||
const next$ = callTools(
|
||||
{
|
||||
system: RCA_SYSTEM_PROMPT_BASE,
|
||||
connectorId,
|
||||
inferenceClient,
|
||||
messages: nextEvents,
|
||||
logger,
|
||||
},
|
||||
({ messages }) => {
|
||||
const lastSuccessfulToolResponse = findLast(
|
||||
messages,
|
||||
(message) => message.role === MessageRole.Tool && message.name !== 'error'
|
||||
) as Exclude<ToolMessage, ToolErrorMessage> | undefined;
|
||||
|
||||
const shouldWriteObservationNext =
|
||||
!lastSuccessfulToolResponse || lastSuccessfulToolResponse.name !== RCA_OBSERVE_TOOL_NAME;
|
||||
|
||||
const nextTools = shouldWriteObservationNext
|
||||
? pick(RCA_TOOLS, RCA_OBSERVE_TOOL_NAME)
|
||||
: pick(RCA_TOOLS, RCA_END_PROCESS_TOOL_NAME, RCA_INVESTIGATE_ENTITY_TOOL_NAME);
|
||||
|
||||
const nextSystem = shouldWriteObservationNext
|
||||
? SYSTEM_PROMPT_WITH_OBSERVE_INSTRUCTIONS
|
||||
: SYSTEM_PROMPT_WITH_DECISION_INSTRUCTIONS;
|
||||
|
||||
return {
|
||||
messages,
|
||||
system: `${nextSystem}
|
||||
|
||||
${investigationTimeRangePrompt}`,
|
||||
tools: nextTools,
|
||||
toolChoice: shouldWriteObservationNext
|
||||
? { function: RCA_OBSERVE_TOOL_NAME }
|
||||
: ToolChoiceType.required,
|
||||
};
|
||||
},
|
||||
({
|
||||
toolCalls,
|
||||
messages,
|
||||
}): Observable<
|
||||
| ObservationToolMessage
|
||||
| ToolErrorMessage
|
||||
| InvestigateEntityToolMessage
|
||||
| EndProcessToolMessage
|
||||
| AssistantMessage
|
||||
> => {
|
||||
const nextRcaContext = {
|
||||
...initialRcaContext,
|
||||
events: messages as RootCauseAnalysisEvent[],
|
||||
};
|
||||
|
||||
return of(undefined).pipe(
|
||||
switchMap(() => {
|
||||
return from(
|
||||
validateInvestigateEntityToolCalls({ rcaContext: nextRcaContext, toolCalls })
|
||||
);
|
||||
}),
|
||||
switchMap((errors) => {
|
||||
if (errors.length) {
|
||||
return of(
|
||||
...toolCalls.map((toolCall) => {
|
||||
const toolCallErrorMessage: ToolErrorMessage = {
|
||||
role: MessageRole.Tool,
|
||||
name: 'error',
|
||||
response: {
|
||||
error: {
|
||||
message: `Some ${RCA_INVESTIGATE_ENTITY_TOOL_NAME} calls were not valid:
|
||||
${errors.map((error) => `- ${error}`).join('\n')}`,
|
||||
},
|
||||
},
|
||||
toolCallId: toolCall.toolCallId,
|
||||
};
|
||||
return toolCallErrorMessage;
|
||||
})
|
||||
);
|
||||
}
|
||||
return of(...toolCalls).pipe(
|
||||
mergeMap((toolCall) => {
|
||||
function executeToolCall(): Observable<
|
||||
| EndProcessToolMessage
|
||||
| InvestigateEntityToolMessage
|
||||
| ObservationToolMessage
|
||||
| ToolErrorMessage
|
||||
| AssistantMessage
|
||||
> {
|
||||
switch (toolCall.function.name) {
|
||||
case RCA_END_PROCESS_TOOL_NAME:
|
||||
return callEndRcaProcessTool({
|
||||
rcaContext: nextRcaContext,
|
||||
toolCallId: toolCall.toolCallId,
|
||||
});
|
||||
|
||||
case RCA_INVESTIGATE_ENTITY_TOOL_NAME:
|
||||
return callInvestigateEntityTool({
|
||||
context: toolCall.function.arguments.context,
|
||||
field: toolCall.function.arguments.entity.field,
|
||||
value: toolCall.function.arguments.entity.value,
|
||||
rcaContext: nextRcaContext,
|
||||
toolCallId: toolCall.toolCallId,
|
||||
});
|
||||
|
||||
case RCA_OBSERVE_TOOL_NAME:
|
||||
return callObserveTool({
|
||||
rcaContext: nextRcaContext,
|
||||
toolCallId: toolCall.toolCallId,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return executeToolCall().pipe(
|
||||
catchError((error) => {
|
||||
logger.error(`Failed executing task: ${error.message}`);
|
||||
logger.error(error);
|
||||
const toolErrorMessage: ToolErrorMessage = {
|
||||
name: 'error',
|
||||
role: MessageRole.Tool,
|
||||
response: {
|
||||
error: {
|
||||
...('toJSON' in error && typeof error.toJSON === 'function'
|
||||
? error.toJSON()
|
||||
: {}),
|
||||
message: error.message,
|
||||
},
|
||||
},
|
||||
toolCallId: toolCall.toolCallId,
|
||||
};
|
||||
return of(toolErrorMessage);
|
||||
})
|
||||
);
|
||||
}, 3)
|
||||
);
|
||||
})
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
return next$.pipe(
|
||||
filter((event) =>
|
||||
Boolean(event.role !== MessageRole.Assistant || event.content || event.toolCalls?.length)
|
||||
),
|
||||
map((event) => {
|
||||
if (event.role === MessageRole.Assistant) {
|
||||
return event as Extract<RootCauseAnalysisEvent, AssistantMessage>;
|
||||
}
|
||||
return event;
|
||||
})
|
||||
);
|
||||
}
|
|
@ -0,0 +1,402 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
import { getEntityKuery } from '@kbn/observability-utils-common/entities/get_entity_kuery';
|
||||
import { formatValueForKql } from '@kbn/observability-utils-common/es/format_value_for_kql';
|
||||
import type { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
|
||||
import { ShortIdTable } from '@kbn/observability-utils-common/llm/short_id_table';
|
||||
import {
|
||||
P_VALUE_SIGNIFICANCE_HIGH,
|
||||
P_VALUE_SIGNIFICANCE_MEDIUM,
|
||||
} from '@kbn/observability-utils-common/ml/p_value_to_label';
|
||||
import {
|
||||
FieldPatternResultWithChanges,
|
||||
getLogPatterns,
|
||||
} from '@kbn/observability-utils-server/entities/get_log_patterns';
|
||||
import { castArray, compact, groupBy, orderBy } from 'lodash';
|
||||
import { RCA_PROMPT_CHANGES, RCA_PROMPT_ENTITIES } from '../../prompts';
|
||||
import { RootCauseAnalysisContext } from '../../types';
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries';
|
||||
import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
|
||||
|
||||
type LogPatternRelevance = 'normal' | 'unusual' | 'warning' | 'critical';
|
||||
|
||||
export type AnalyzedLogPattern = FieldPatternResultWithChanges & {
|
||||
relevance: LogPatternRelevance;
|
||||
interesting: boolean;
|
||||
};
|
||||
|
||||
export interface AnalyzeLogPatternOutput {
|
||||
ownPatterns: AnalyzedLogPattern[];
|
||||
patternsFromOtherEntities: AnalyzedLogPattern[];
|
||||
}
|
||||
|
||||
const normalDescription = `normal operations, such as such access logs`;
|
||||
const unusualDescription = `something unusual and/or
|
||||
appear rarely, such as startup or shutdown messages or
|
||||
other rare vents`;
|
||||
const warningDescription = `something being in an unexpected state,
|
||||
such as error messages, rate limiting or disk usage warnings`;
|
||||
const criticalDescription = `something being in a critical state,
|
||||
such as startup failure messages, out-of-memory errors or crashloopbackoff
|
||||
events`;
|
||||
|
||||
interface LogPatternCutOff {
|
||||
significance?: 'high' | 'medium' | 'low';
|
||||
pValue?: number;
|
||||
}
|
||||
|
||||
export async function analyzeLogPatterns({
|
||||
entity,
|
||||
allAnalysis,
|
||||
system,
|
||||
rcaContext: { logger: parentLogger, inferenceClient, connectorId, esClient, start, end, indices },
|
||||
cutoff,
|
||||
kbEntries,
|
||||
}: {
|
||||
entity: Record<string, string>;
|
||||
allAnalysis: Array<{ index: string | string[]; analysis: TruncatedDocumentAnalysis }>;
|
||||
system: string;
|
||||
cutoff?: LogPatternCutOff;
|
||||
kbEntries: ScoredKnowledgeBaseEntry[];
|
||||
rcaContext: Pick<
|
||||
RootCauseAnalysisContext,
|
||||
'indices' | 'logger' | 'inferenceClient' | 'connectorId' | 'esClient' | 'start' | 'end'
|
||||
>;
|
||||
}): Promise<AnalyzeLogPatternOutput> {
|
||||
const kuery = getEntityKuery(entity);
|
||||
|
||||
const logger = parentLogger.get('analyzeLogPatterns');
|
||||
|
||||
const fields = ['message', 'error.exception.message'];
|
||||
|
||||
logger.debug(() => `Analyzing log patterns for ${JSON.stringify(entity)}`);
|
||||
|
||||
const systemPrompt = `You are a helpful assistant for Elastic Observability.
|
||||
You are an expert in analyzing log messages for software
|
||||
systems, and you use your extensive experience as an SRE
|
||||
to thoroughly analyze log patterns for things that require
|
||||
attention from the user.
|
||||
|
||||
${RCA_PROMPT_CHANGES}
|
||||
|
||||
${RCA_PROMPT_ENTITIES}
|
||||
|
||||
## Entity
|
||||
|
||||
The following entity is being analyzed:
|
||||
|
||||
${formatEntity(entity)}
|
||||
|
||||
${serializeKnowledgeBaseEntries(kbEntries)}
|
||||
|
||||
### Entity analysis
|
||||
|
||||
${allAnalysis.map(({ index: analyzedIndex, analysis }) => {
|
||||
return `#### Indices: ${castArray(analyzedIndex).join(',')}
|
||||
|
||||
${JSON.stringify(analysis)}`;
|
||||
})}
|
||||
|
||||
${system}`;
|
||||
|
||||
const kueryForOtherEntities = `NOT (${kuery}) AND ${Object.values(entity)
|
||||
.map(
|
||||
(val) =>
|
||||
`(${fields.map((field) => `(${[field, formatValueForKql(val)].join(':')})`).join(' OR ')})`
|
||||
)
|
||||
.join(' AND ')}`;
|
||||
|
||||
const [logPatternsFromEntity, logPatternsFromElsewhere] = await Promise.all([
|
||||
getLogPatterns({
|
||||
esClient,
|
||||
index: [...indices.logs, ...indices.traces],
|
||||
start,
|
||||
end,
|
||||
kuery,
|
||||
includeChanges: true,
|
||||
fields,
|
||||
metadata: [],
|
||||
}),
|
||||
getLogPatterns({
|
||||
esClient,
|
||||
index: [...indices.logs],
|
||||
start,
|
||||
end,
|
||||
kuery: kueryForOtherEntities,
|
||||
metadata: Object.keys(entity),
|
||||
includeChanges: true,
|
||||
fields,
|
||||
}),
|
||||
]);
|
||||
const patternIdLookupTable = new ShortIdTable();
|
||||
|
||||
logger.debug(
|
||||
() =>
|
||||
`Found ${logPatternsFromEntity.length} own log patterns and ${logPatternsFromElsewhere.length} from others`
|
||||
);
|
||||
|
||||
logger.trace(
|
||||
() =>
|
||||
`Found log patterns${JSON.stringify({
|
||||
entity,
|
||||
logPatternsFromEntity,
|
||||
logPatternsFromElsewhere,
|
||||
})}`
|
||||
);
|
||||
|
||||
const patternsWithIds = [...logPatternsFromEntity, ...logPatternsFromElsewhere].map((pattern) => {
|
||||
return {
|
||||
...pattern,
|
||||
shortId: patternIdLookupTable.take(pattern.regex),
|
||||
};
|
||||
});
|
||||
|
||||
const patternsByRegex = new Map(patternsWithIds.map((pattern) => [pattern.regex, pattern]));
|
||||
|
||||
const serializedOwnEntity = formatEntity(entity);
|
||||
|
||||
const [ownPatterns, patternsFromOtherEntities] = await Promise.all([
|
||||
logPatternsFromEntity.length ? categorizeOwnPatterns() : [],
|
||||
logPatternsFromElsewhere.length ? selectRelevantPatternsFromOtherEntities() : [],
|
||||
]);
|
||||
|
||||
logger.trace(
|
||||
() =>
|
||||
`Classified log patterns ${JSON.stringify([entity, ownPatterns, patternsFromOtherEntities])}`
|
||||
);
|
||||
|
||||
const allPatterns = [...ownPatterns, ...patternsFromOtherEntities];
|
||||
|
||||
const sortedByPValueAsc = orderBy(
|
||||
allPatterns.filter((pattern) => pattern.change && pattern.change.p_value),
|
||||
(pattern) => {
|
||||
return pattern.change.p_value;
|
||||
},
|
||||
'asc'
|
||||
);
|
||||
|
||||
const pValueCutOff = getPValueCutoff({ cutoff, max: sortedByPValueAsc[0]?.change.p_value });
|
||||
|
||||
return {
|
||||
ownPatterns: ownPatterns.map((pattern) => ({
|
||||
...pattern,
|
||||
interesting: isInterestingPattern(pattern, pValueCutOff),
|
||||
})),
|
||||
patternsFromOtherEntities: patternsFromOtherEntities.map((pattern) => ({
|
||||
...pattern,
|
||||
interesting: isInterestingPattern(pattern, pValueCutOff),
|
||||
})),
|
||||
};
|
||||
|
||||
function categorizeOwnPatterns() {
|
||||
return inferenceClient
|
||||
.output({
|
||||
id: 'analyze_log_patterns',
|
||||
connectorId,
|
||||
system: systemPrompt,
|
||||
input: `Based on the following log patterns from
|
||||
${formatEntity(entity)}, group these patterns into
|
||||
the following categories:
|
||||
|
||||
- normal (patterns that are indicative of ${normalDescription})
|
||||
- unusual (patterns that are indicative of ${unusualDescription})
|
||||
- warning (patterns that are indicative of ${warningDescription})
|
||||
- critical (patterns that are indicative of ${criticalDescription})
|
||||
|
||||
## Log patterns:
|
||||
|
||||
${preparePatternsForLlm(logPatternsFromEntity)}
|
||||
`,
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
categories: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
relevance: {
|
||||
type: 'string',
|
||||
enum: ['normal', 'unusual', 'warning', 'critical'],
|
||||
},
|
||||
shortIds: {
|
||||
type: 'array',
|
||||
description:
|
||||
'The pattern IDs you want to group here. Use the pattern short ID.',
|
||||
items: {
|
||||
type: 'string',
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ['relevance', 'shortIds'],
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ['categories'],
|
||||
} as const,
|
||||
})
|
||||
.then((outputEvent) => {
|
||||
return outputEvent.output.categories.flatMap((category) => {
|
||||
return mapIdsBackToPatterns(category.shortIds).map((pattern) => {
|
||||
return {
|
||||
...pattern,
|
||||
relevance: category.relevance,
|
||||
};
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function selectRelevantPatternsFromOtherEntities() {
|
||||
return inferenceClient
|
||||
.output({
|
||||
id: 'select_relevant_patterns_from_other_entities',
|
||||
connectorId,
|
||||
system: systemPrompt,
|
||||
input: `Based on the following log patterns that
|
||||
are NOT from ${serializedOwnEntity}, group these
|
||||
patterns into the following categories:
|
||||
|
||||
- irrelevant (patterns that are not relevant for
|
||||
${serializedOwnEntity})
|
||||
- normal (patterns that relevant for
|
||||
${serializedOwnEntity} and are indicative of ${normalDescription})
|
||||
- unusual (patterns that are relevant for
|
||||
${serializedOwnEntity} and are indicative of ${unusualDescription})
|
||||
- warning (patterns that are relevant for
|
||||
${serializedOwnEntity} and are indicative of ${warningDescription})
|
||||
- critical (patterns that are relevant for
|
||||
${serializedOwnEntity} and are indicative of ${criticalDescription})
|
||||
|
||||
Relevant patterns are messages that mention the
|
||||
investigated entity, or things that are indicative
|
||||
of critical failures or changes in the entity
|
||||
that owns the log pattern.
|
||||
|
||||
## Log patterns:
|
||||
|
||||
${preparePatternsForLlm(logPatternsFromElsewhere)}
|
||||
`,
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
categories: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
relevance: {
|
||||
type: 'string',
|
||||
enum: ['irrelevant', 'normal', 'unusual', 'warning', 'critical'],
|
||||
},
|
||||
shortIds: {
|
||||
type: 'array',
|
||||
description:
|
||||
'The pattern IDs you want to group here. Use the pattern short ID.',
|
||||
items: {
|
||||
type: 'string',
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ['relevance', 'shortIds'],
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ['categories'],
|
||||
} as const,
|
||||
})
|
||||
.then((outputEvent) => {
|
||||
return outputEvent.output.categories.flatMap((category) => {
|
||||
return mapIdsBackToPatterns(category.shortIds).flatMap((pattern) => {
|
||||
if (category.relevance === 'irrelevant') {
|
||||
return [];
|
||||
}
|
||||
return [
|
||||
{
|
||||
...pattern,
|
||||
relevance: category.relevance,
|
||||
},
|
||||
];
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function preparePatternsForLlm(patterns: FieldPatternResultWithChanges[]): string {
|
||||
const groupedByField = groupBy(patterns, (pattern) => pattern.field);
|
||||
|
||||
return Object.entries(groupedByField)
|
||||
.map(([field, patternsForField]) => {
|
||||
return `### \`${field}\`
|
||||
|
||||
#### Patterns
|
||||
|
||||
${JSON.stringify(
|
||||
patternsForField.map((pattern) => {
|
||||
return {
|
||||
shortId: patternIdLookupTable.take(pattern.regex),
|
||||
regex: pattern.regex,
|
||||
sample: pattern.sample,
|
||||
highlight: pattern.highlight,
|
||||
change: pattern.change,
|
||||
};
|
||||
})
|
||||
)}
|
||||
`;
|
||||
})
|
||||
.join('\n\n');
|
||||
}
|
||||
|
||||
function mapIdsBackToPatterns(ids?: string[]) {
|
||||
return compact(
|
||||
ids?.map((shortId) => {
|
||||
const lookupId = patternIdLookupTable.lookup(shortId);
|
||||
if (!lookupId) {
|
||||
return undefined;
|
||||
}
|
||||
const pattern = patternsByRegex.get(lookupId);
|
||||
return pattern;
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function isInterestingPattern(
|
||||
pattern: Omit<AnalyzedLogPattern, 'interesting'>,
|
||||
pValueCutOff: number
|
||||
) {
|
||||
return (pattern.change.p_value ?? 1) <= pValueCutOff || pattern.relevance !== 'normal';
|
||||
}
|
||||
|
||||
function getPValueCutoff({ max, cutoff }: { max?: number; cutoff?: LogPatternCutOff }) {
|
||||
if (cutoff?.pValue) {
|
||||
return cutoff?.pValue;
|
||||
}
|
||||
|
||||
if (cutoff?.significance === 'high') {
|
||||
return P_VALUE_SIGNIFICANCE_HIGH;
|
||||
}
|
||||
|
||||
if (cutoff?.significance === 'medium') {
|
||||
return P_VALUE_SIGNIFICANCE_MEDIUM;
|
||||
}
|
||||
|
||||
if (max === undefined) {
|
||||
return Number.MAX_VALUE;
|
||||
}
|
||||
|
||||
if (max <= P_VALUE_SIGNIFICANCE_HIGH) {
|
||||
return P_VALUE_SIGNIFICANCE_HIGH;
|
||||
}
|
||||
|
||||
if (max <= P_VALUE_SIGNIFICANCE_MEDIUM) {
|
||||
return P_VALUE_SIGNIFICANCE_MEDIUM;
|
||||
}
|
||||
|
||||
return Number.MAX_VALUE;
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { InferenceClient } from '@kbn/inference-plugin/server';
|
||||
import { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
|
||||
import { FieldPatternResultWithChanges } from '@kbn/observability-utils-server/entities/get_log_patterns';
|
||||
import { RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries';
|
||||
import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
|
||||
import { getInvestigateEntityTaskPrompt } from '../investigate_entity/prompts';
|
||||
|
||||
export async function describeEntity({
|
||||
inferenceClient,
|
||||
connectorId,
|
||||
entity,
|
||||
contextForEntityInvestigation,
|
||||
analysis,
|
||||
ownPatterns,
|
||||
kbEntries,
|
||||
}: {
|
||||
inferenceClient: InferenceClient;
|
||||
connectorId: string;
|
||||
entity: Record<string, string>;
|
||||
analysis: TruncatedDocumentAnalysis;
|
||||
contextForEntityInvestigation: string;
|
||||
ownPatterns: FieldPatternResultWithChanges[];
|
||||
kbEntries: ScoredKnowledgeBaseEntry[];
|
||||
}) {
|
||||
const system = RCA_SYSTEM_PROMPT_BASE;
|
||||
|
||||
const input = `${getInvestigateEntityTaskPrompt({ entity, contextForEntityInvestigation })}
|
||||
|
||||
## Context for investigating ${formatEntity(entity)}
|
||||
|
||||
${contextForEntityInvestigation}
|
||||
|
||||
${serializeKnowledgeBaseEntries(kbEntries)}
|
||||
|
||||
## Data samples
|
||||
|
||||
${JSON.stringify(analysis)}
|
||||
|
||||
## Log patterns
|
||||
|
||||
${JSON.stringify(ownPatterns.map(({ regex, sample }) => ({ regex, sample })))}
|
||||
|
||||
## Current task
|
||||
|
||||
Describe the entity characteristics based on the sample documents and log
|
||||
patterns. Put it in context of the investigation process. Mention the reason
|
||||
why it's being investigated, and how it is related other entities that were
|
||||
previously investigated. Mention these three things:
|
||||
|
||||
- infrastructure & environment
|
||||
- communication characteristics (protocols and endpoints)
|
||||
- context of entity in investigation
|
||||
|
||||
You shouldn't mention the log patterns, they will be analyzed elsewhere.
|
||||
`;
|
||||
|
||||
const response = await inferenceClient.output({
|
||||
id: 'describe_entity',
|
||||
connectorId,
|
||||
system,
|
||||
input,
|
||||
});
|
||||
|
||||
return response.content;
|
||||
}
|
|
@ -0,0 +1,189 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { InferenceClient } from '@kbn/inference-plugin/server';
|
||||
import { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
|
||||
import { omit, partition, sumBy } from 'lodash';
|
||||
import { RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries';
|
||||
import { AnalyzedLogPattern } from '../analyze_log_patterns';
|
||||
import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
|
||||
import { getInvestigateEntityTaskPrompt } from '../investigate_entity/prompts';
|
||||
|
||||
export interface LogPatternDescription {
|
||||
content: string;
|
||||
docCount: number;
|
||||
interestingPatternCount: number;
|
||||
ignoredPatternCount: number;
|
||||
ignoredDocCount: number;
|
||||
}
|
||||
|
||||
export async function describeLogPatterns({
|
||||
inferenceClient,
|
||||
connectorId,
|
||||
entity,
|
||||
contextForEntityInvestigation,
|
||||
analysis,
|
||||
ownPatterns: allOwnPatterns,
|
||||
patternsFromOtherEntities,
|
||||
kbEntries,
|
||||
}: {
|
||||
inferenceClient: InferenceClient;
|
||||
connectorId: string;
|
||||
entity: Record<string, string>;
|
||||
analysis: TruncatedDocumentAnalysis;
|
||||
contextForEntityInvestigation: string;
|
||||
ownPatterns: AnalyzedLogPattern[];
|
||||
patternsFromOtherEntities: AnalyzedLogPattern[];
|
||||
kbEntries: ScoredKnowledgeBaseEntry[];
|
||||
}): Promise<LogPatternDescription> {
|
||||
const system = RCA_SYSTEM_PROMPT_BASE;
|
||||
|
||||
const [ownInterestingPatterns, ignoredOwnPatterns] = partition(
|
||||
allOwnPatterns,
|
||||
(pattern) => pattern.interesting
|
||||
);
|
||||
|
||||
const stats = {
|
||||
docCount: sumBy(allOwnPatterns, (pattern) => pattern.count),
|
||||
interestingPatternCount: ownInterestingPatterns.length,
|
||||
otherInterestingPatternCount: patternsFromOtherEntities.length,
|
||||
ignoredPatternCount: ignoredOwnPatterns.length,
|
||||
ignoredDocCount: sumBy(ignoredOwnPatterns, (pattern) => pattern.count),
|
||||
};
|
||||
|
||||
const header = `## Log analysis
|
||||
|
||||
### Stats for own log patterns:
|
||||
- ${stats.docCount} documents analyzed
|
||||
- ${stats.interestingPatternCount} interesting patterns
|
||||
- ${stats.ignoredPatternCount} ignored patterns, accounting for
|
||||
${stats.ignoredDocCount} out of ${stats.docCount} documents
|
||||
- ${stats.otherInterestingPatternCount} relevant patterns from
|
||||
other entities`;
|
||||
|
||||
if (!stats.interestingPatternCount && !stats.otherInterestingPatternCount) {
|
||||
return {
|
||||
...stats,
|
||||
content: `${header}\n\nNo interesting log patterns`,
|
||||
};
|
||||
}
|
||||
|
||||
const ownLogPatternsPrompt = ownInterestingPatterns.length
|
||||
? JSON.stringify(
|
||||
ownInterestingPatterns.map(({ regex, sample, change, count, timeseries }) => ({
|
||||
regex,
|
||||
sample,
|
||||
change,
|
||||
count,
|
||||
timeseries: timeseries.map(({ x, y }, index) => {
|
||||
if (index === change.change_point) {
|
||||
return `${change.type} at ${new Date(x).toISOString()}: ${y}`;
|
||||
}
|
||||
return `${new Date(x).toISOString()}: ${y}`;
|
||||
}),
|
||||
}))
|
||||
)
|
||||
: 'No own log patterns found';
|
||||
|
||||
const otherLogPatternsPrompt = patternsFromOtherEntities.length
|
||||
? JSON.stringify(
|
||||
patternsFromOtherEntities.map(
|
||||
({ regex, sample, change, count, timeseries, metadata, field, highlight }) => ({
|
||||
regex,
|
||||
sample,
|
||||
change,
|
||||
count,
|
||||
timeseries: timeseries.map(({ x, y }, index) => {
|
||||
if (index === change.change_point) {
|
||||
return `${change.type} at ${new Date(x).toISOString()}: ${y}`;
|
||||
}
|
||||
return `${new Date(x).toISOString()}: ${y}`;
|
||||
}),
|
||||
entity: omit(metadata, field),
|
||||
highlight,
|
||||
})
|
||||
)
|
||||
)
|
||||
: 'No relevant log patterns found from other entities';
|
||||
|
||||
const input = `${getInvestigateEntityTaskPrompt({ entity, contextForEntityInvestigation })}
|
||||
|
||||
## Context for investigating ${formatEntity(entity)}
|
||||
|
||||
${contextForEntityInvestigation}
|
||||
|
||||
${serializeKnowledgeBaseEntries(kbEntries)}
|
||||
|
||||
## Data samples
|
||||
|
||||
${JSON.stringify(analysis)}
|
||||
|
||||
## Log patterns from ${formatEntity(entity)}
|
||||
|
||||
${ownLogPatternsPrompt}
|
||||
|
||||
## Possibly relevant log patterns from other entities
|
||||
|
||||
${otherLogPatternsPrompt}
|
||||
|
||||
### Interpreting log patterns and samples
|
||||
|
||||
The pattern itself is what is consistent across all messages. The values from these parts
|
||||
are separately given in "constants". There's also a single (random) _sample_ included, with
|
||||
the variable part being given as well. E.g., if the failure in the sample is not part of the pattern
|
||||
itself, you should mention that in your analysis.
|
||||
|
||||
## Task
|
||||
|
||||
Using only the log patterns, describe your observations about the entity.
|
||||
|
||||
Group these pattterns together based on topic. Some examples of these topics:
|
||||
|
||||
- normal operations such as request logs
|
||||
- connection issues to an upstream dependency
|
||||
- startup messages
|
||||
- garbage collection messages
|
||||
|
||||
For patterns with change points, describe the trend before and after the change point based
|
||||
on the data points. E.g.:
|
||||
- A persisted drop to near-zero after 2020-01-01T05:00:00.000Z
|
||||
- A spike from 10 to 100 at 2020-01-01T05:00:00.000Z, which went back down
|
||||
to the average after 2020-01-01T05:02:00.000Z
|
||||
- A trend change after 2020-01-01T05:00:00.000Z. The values ranged from 10
|
||||
to 20 before, but then after increased from 20 to 100 until
|
||||
2020-01-01T05:02:00.000Z.
|
||||
|
||||
Do not:
|
||||
- repeat the variables, instead, repeat the constants.
|
||||
- repeat the timeseries as a whole, verbatim, in full. However, you can use individual data points + timestamps to illustrate the magnitude of the change, as in the example previously given.
|
||||
- make up timestamps.
|
||||
- do not separately list individual events if you have already mentioned
|
||||
the pattern.
|
||||
|
||||
Statistics:
|
||||
|
||||
- ${stats.interestingPatternCount} patterns from ${formatEntity(entity)}
|
||||
were collected
|
||||
- ${stats.docCount} logs were categorized
|
||||
- ${stats.ignoredPatternCount} patterns were deemed uninteresting and accounted
|
||||
for ${stats.ignoredDocCount} out of the total amount of logs
|
||||
`;
|
||||
|
||||
const response = await inferenceClient.output({
|
||||
id: 'describe_log_patterns',
|
||||
connectorId,
|
||||
system,
|
||||
input,
|
||||
});
|
||||
|
||||
return {
|
||||
...stats,
|
||||
content: response.content,
|
||||
};
|
||||
}
|
|
@ -0,0 +1,438 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { InferenceClient } from '@kbn/inference-plugin/server';
|
||||
import { Logger } from '@kbn/logging';
|
||||
import { getEntityKuery } from '@kbn/observability-utils-common/entities/get_entity_kuery';
|
||||
import {
|
||||
DocumentAnalysis,
|
||||
TruncatedDocumentAnalysis,
|
||||
} from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
|
||||
import { sortAndTruncateAnalyzedFields } from '@kbn/observability-utils-common/llm/log_analysis/sort_and_truncate_analyzed_fields';
|
||||
import { analyzeDocuments } from '@kbn/observability-utils-server/entities/analyze_documents';
|
||||
import { FieldPatternResultWithChanges } from '@kbn/observability-utils-server/entities/get_log_patterns';
|
||||
import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client';
|
||||
import { kqlQuery } from '@kbn/observability-utils-server/es/queries/kql_query';
|
||||
import { rangeQuery } from '@kbn/observability-utils-server/es/queries/range_query';
|
||||
import { chunk, isEmpty, isEqual } from 'lodash';
|
||||
import pLimit from 'p-limit';
|
||||
import {
|
||||
RCA_PROMPT_DEPENDENCIES,
|
||||
RCA_PROMPT_ENTITIES,
|
||||
RCA_SYSTEM_PROMPT_BASE,
|
||||
} from '../../prompts';
|
||||
import { chunkOutputCalls } from '../../util/chunk_output_calls';
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries';
|
||||
import { toBlockquote } from '../../util/to_blockquote';
|
||||
import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
|
||||
import { RelatedEntityKeywordSearch } from './write_keyword_searches_for_related_entities';
|
||||
|
||||
export interface RelatedEntityFromSearchResults {
|
||||
entity: { [x: string]: string };
|
||||
highlight: Record<string, string[]>;
|
||||
analysis: TruncatedDocumentAnalysis;
|
||||
}
|
||||
|
||||
function getPromptForFoundEntity({ entity, analysis, highlight }: RelatedEntityFromSearchResults) {
|
||||
return `## Entity: ${formatEntity(entity)}
|
||||
|
||||
${toBlockquote(`### Search highlights for ${formatEntity(entity)}
|
||||
${JSON.stringify(highlight)}`)}
|
||||
`;
|
||||
}
|
||||
|
||||
function getInputPromptBase({
|
||||
entity,
|
||||
analysis,
|
||||
ownPatterns,
|
||||
patternsFromOtherEntities,
|
||||
searches,
|
||||
context,
|
||||
kbEntries,
|
||||
}: {
|
||||
entity: Record<string, string>;
|
||||
analysis: TruncatedDocumentAnalysis;
|
||||
ownPatterns: FieldPatternResultWithChanges[];
|
||||
patternsFromOtherEntities: FieldPatternResultWithChanges[];
|
||||
searches: RelatedEntityKeywordSearch[];
|
||||
context: string;
|
||||
kbEntries: ScoredKnowledgeBaseEntry[];
|
||||
}) {
|
||||
const otherPatternsPrompt = patternsFromOtherEntities.length
|
||||
? JSON.stringify(
|
||||
patternsFromOtherEntities.map((pattern) => ({
|
||||
sample: pattern.sample,
|
||||
regex: pattern.regex,
|
||||
}))
|
||||
)
|
||||
: 'No relevant log patterns from other entities found';
|
||||
const logPatternsPrompt = ownPatterns.length
|
||||
? JSON.stringify(
|
||||
ownPatterns.map((pattern) => {
|
||||
return { sample: pattern.sample, regex: pattern.regex };
|
||||
})
|
||||
)
|
||||
: 'No log patterns found';
|
||||
return `Describe possible relationships to the investigated entity ${formatEntity(entity)}.
|
||||
|
||||
## Context
|
||||
|
||||
${toBlockquote(context)}
|
||||
|
||||
${serializeKnowledgeBaseEntries(kbEntries)}
|
||||
|
||||
## Data analysis
|
||||
${JSON.stringify(analysis)}
|
||||
|
||||
## Log patterns for ${formatEntity(entity)}
|
||||
|
||||
${logPatternsPrompt}
|
||||
|
||||
## Patterns from other entities
|
||||
|
||||
${otherPatternsPrompt}
|
||||
|
||||
## Search keywords
|
||||
|
||||
${searches
|
||||
.map(({ fragments, appearsAs }) => {
|
||||
return `## Appears as: ${appearsAs}
|
||||
|
||||
### Fragments:
|
||||
${fragments.map((fragment) => `- \`${fragment}\``).join('\n')}`;
|
||||
})
|
||||
.join('\n')}`;
|
||||
}
|
||||
|
||||
function getInputPromptInstructions({ entity }: { entity: Record<string, any> }) {
|
||||
return `### Indicator strength
|
||||
|
||||
In an Observability system, indicators of relationships between entities like
|
||||
services, hosts, users, or requests can vary in strength. Some indicators
|
||||
clearly define relationships, while others only suggest correlations. Here’s a
|
||||
breakdown of these indicators into strong, average, and weak categories, with an
|
||||
additional look at how weak indicators can become strong when combined.
|
||||
|
||||
Strong indicators provide definitive links between entities. Distributed tracing
|
||||
IDs (trace, span, and parent) are among the strongest indicators, as they map
|
||||
the complete request path across services, showing exact service interactions.
|
||||
Session or user IDs are also strong indicators, capturing a user’s actions
|
||||
across services or hosts and revealing issues specific to particular users.
|
||||
|
||||
Average indicators give helpful context but may require supporting data to
|
||||
clarify relationships. IP addresses, for instance, are moderately strong for
|
||||
tracking inter-service calls within controlled environments but are weaker
|
||||
across public or shared networks where IP reuse is common. URL paths also fall
|
||||
in this category; they link entities to specific endpoints or service functions
|
||||
and are moderately strong for tracking interactions between microservices with
|
||||
known APIs. Port numbers are another average indicator. While they suggest the
|
||||
service interaction type (HTTP, database), they generally need pairing with IP
|
||||
addresses or URLs for more accuracy, as port numbers alone are often shared
|
||||
across different services.
|
||||
|
||||
Weak indicators are often too generic to imply a direct relationship but can
|
||||
suggest possible correlations. Host names, for example, are broad and typically
|
||||
cover a range of services or applications, especially in large clusters.
|
||||
Time-based indicators, such as timestamps or TTL values, suggest possible timing
|
||||
correlations but don’t establish a definitive link on their own. Status codes,
|
||||
like HTTP 500 errors, indicate issues but don’t specify causality, often
|
||||
requiring corroboration with stronger indicators like trace or session IDs.
|
||||
|
||||
However, weak indicators can become strong when they appear together. For
|
||||
instance, a combination of IP address, port, and timestamp can strongly suggest
|
||||
a direct interaction between services, especially when the same combination is
|
||||
seen repeatedly or in conjunction with related URLs. Similarly, a host name
|
||||
combined with a unique URL path can strongly suggest that a specific service or
|
||||
pod is generating particular request patterns, even if each alone is too
|
||||
general.
|
||||
|
||||
## Relevance to the investigation
|
||||
|
||||
Given the context of the investigation, some entities might be very relevant
|
||||
even if there is no strong evidence of them being a direct dependency of
|
||||
${formatEntity(entity)}. For instance, the related entity might be an
|
||||
orchestrating entity, or it might be involved in a specific operation related
|
||||
to the ongoing issue.
|
||||
|
||||
## Identifying entity relationships
|
||||
|
||||
Your current task is to identify possible entity relationships for the
|
||||
investigated entity ${formatEntity(entity)}. You will get some context, document
|
||||
analysis for the investigated entity, and results from keyword searches that were
|
||||
extracted from the entity. Based on this data, list entities that could possibly
|
||||
be related to the given entity and/or the initial context. List the highly
|
||||
relevant entities first.
|
||||
|
||||
## Output
|
||||
|
||||
For each possible relationship, describe the following things:
|
||||
- The related entity (as a key-value pair)
|
||||
- The indicators you have observed as evidence of the relationship. Include the
|
||||
strength of the indicator, and the exact pieces of data that are related to it
|
||||
(field names and values, in both the investigated entity, and the possibly
|
||||
related entity).
|
||||
- Reason how the related entity is related to both ${formatEntity(entity)} as a
|
||||
dependency and the context. For instance, describe who is the caller and callee
|
||||
or whether that is unclear, based on the data, or explain how it might be
|
||||
related to the context.
|
||||
- The overall likeliness of it being a relevant entity.`;
|
||||
}
|
||||
|
||||
export async function analyzeFetchedRelatedEntities({
|
||||
connectorId,
|
||||
inferenceClient,
|
||||
esClient,
|
||||
start,
|
||||
end,
|
||||
searches,
|
||||
groupingFields,
|
||||
index,
|
||||
entity,
|
||||
ownPatterns,
|
||||
analysis,
|
||||
patternsFromOtherEntities,
|
||||
logger: parentLogger,
|
||||
context,
|
||||
kbEntries,
|
||||
}: {
|
||||
connectorId: string;
|
||||
inferenceClient: InferenceClient;
|
||||
esClient: ObservabilityElasticsearchClient;
|
||||
start: number;
|
||||
end: number;
|
||||
searches: RelatedEntityKeywordSearch[];
|
||||
groupingFields: string[];
|
||||
index: string | string[];
|
||||
entity: Record<string, string>;
|
||||
analysis: {
|
||||
truncated: TruncatedDocumentAnalysis;
|
||||
full: DocumentAnalysis;
|
||||
};
|
||||
ownPatterns: FieldPatternResultWithChanges[];
|
||||
patternsFromOtherEntities: FieldPatternResultWithChanges[];
|
||||
context: string;
|
||||
logger: Logger;
|
||||
kbEntries: ScoredKnowledgeBaseEntry[];
|
||||
}): Promise<{
|
||||
summaries: string[];
|
||||
foundEntities: RelatedEntityFromSearchResults[];
|
||||
}> {
|
||||
const entityFields = Object.keys(entity);
|
||||
|
||||
const logger = parentLogger.get('findRelatedEntities');
|
||||
|
||||
logger.debug(
|
||||
() => `Finding related entities: ${JSON.stringify({ entity, groupingFields, searches })}`
|
||||
);
|
||||
|
||||
const allValuesFromEntity = Array.from(
|
||||
new Set(analysis.full.fields.flatMap((field) => field.values))
|
||||
);
|
||||
|
||||
const foundEntities = (
|
||||
await Promise.all(
|
||||
groupingFields.map((groupingField) => getResultsForGroupingField(groupingField))
|
||||
)
|
||||
).flat();
|
||||
|
||||
logger.debug(() => `Found ${foundEntities.length} entities via keyword searches`);
|
||||
|
||||
const system = `${RCA_SYSTEM_PROMPT_BASE}
|
||||
|
||||
${RCA_PROMPT_ENTITIES}
|
||||
|
||||
${RCA_PROMPT_DEPENDENCIES}`;
|
||||
|
||||
const inputPromptBase = getInputPromptBase({
|
||||
entity,
|
||||
analysis: analysis.truncated,
|
||||
ownPatterns,
|
||||
patternsFromOtherEntities,
|
||||
searches,
|
||||
context,
|
||||
kbEntries,
|
||||
});
|
||||
|
||||
const foundEntityPrompts = foundEntities.map((foundEntity) => {
|
||||
return {
|
||||
text: getPromptForFoundEntity(foundEntity),
|
||||
id: formatEntity(foundEntity.entity),
|
||||
};
|
||||
});
|
||||
|
||||
const inputPromptInstructions = getInputPromptInstructions({ entity });
|
||||
|
||||
// don't do more than 10 entities in a response, we'll run out of
|
||||
// tokens
|
||||
const requests = chunk(foundEntityPrompts, 10).flatMap((texts) =>
|
||||
chunkOutputCalls({
|
||||
system,
|
||||
input: `${inputPromptBase} ${inputPromptInstructions}`,
|
||||
texts,
|
||||
tokenLimit: 32_000 - 6_000,
|
||||
})
|
||||
);
|
||||
|
||||
const allRelevantEntityDescriptions = await Promise.all(
|
||||
requests.map(async (request) => {
|
||||
const outputCompleteEvent = await inferenceClient.output({
|
||||
id: 'describe_relevant_entities',
|
||||
connectorId,
|
||||
system: request.system,
|
||||
input: `${inputPromptBase}
|
||||
|
||||
# Found entities
|
||||
|
||||
${request.texts.map((text) => text.text).join('\n\n')}
|
||||
|
||||
${inputPromptInstructions}`,
|
||||
});
|
||||
|
||||
return outputCompleteEvent.content;
|
||||
})
|
||||
);
|
||||
|
||||
return {
|
||||
summaries: allRelevantEntityDescriptions,
|
||||
foundEntities,
|
||||
};
|
||||
|
||||
async function getResultsForGroupingField(
|
||||
groupingField: string
|
||||
): Promise<RelatedEntityFromSearchResults[]> {
|
||||
const excludeQuery = isEqual([groupingField], entityFields)
|
||||
? `NOT (${groupingField}:"${entity[groupingField]}")`
|
||||
: ``;
|
||||
|
||||
const fieldCaps = await esClient.fieldCaps('check_if_grouping_field_exists', {
|
||||
fields: [groupingField],
|
||||
index,
|
||||
index_filter: {
|
||||
bool: {
|
||||
filter: [...rangeQuery(start, end)],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
if (isEmpty(fieldCaps.fields[groupingField])) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const keywordSearchResults = await esClient.search(
|
||||
'find_related_entities_via_keyword_searches',
|
||||
{
|
||||
track_total_hits: false,
|
||||
index,
|
||||
query: {
|
||||
bool: {
|
||||
must: [...rangeQuery(start, end), ...kqlQuery(excludeQuery)],
|
||||
should: [
|
||||
{
|
||||
multi_match: {
|
||||
query: searches.flatMap((search) => search.fragments).join(' '),
|
||||
fields: '*',
|
||||
},
|
||||
},
|
||||
],
|
||||
minimum_should_match: 1,
|
||||
},
|
||||
},
|
||||
fields: [groupingField],
|
||||
collapse: {
|
||||
field: groupingField,
|
||||
},
|
||||
highlight: {
|
||||
fields: {
|
||||
'*': {},
|
||||
},
|
||||
},
|
||||
_source: false,
|
||||
size: 1_000,
|
||||
}
|
||||
);
|
||||
|
||||
if (!keywordSearchResults.hits.hits.length) {
|
||||
logger.debug(() => `No hits: ${JSON.stringify({ entity, groupingField, searches })}`);
|
||||
return [];
|
||||
}
|
||||
|
||||
logger.trace(
|
||||
() =>
|
||||
`Hits: ${JSON.stringify({
|
||||
entity,
|
||||
groupingField,
|
||||
searches,
|
||||
count: keywordSearchResults.hits.hits.length,
|
||||
hits: keywordSearchResults.hits.hits,
|
||||
})}`
|
||||
);
|
||||
|
||||
const limiter = pLimit(20);
|
||||
|
||||
const groupingFieldAnalysis = await Promise.all(
|
||||
keywordSearchResults.hits.hits.map(async (hit) => {
|
||||
return limiter(async () => {
|
||||
const groupValue = hit.fields![groupingField][0] as string;
|
||||
|
||||
const analysisForGroupingField = await analyzeDocuments({
|
||||
esClient,
|
||||
start,
|
||||
end,
|
||||
index,
|
||||
kuery: getEntityKuery({
|
||||
[groupingField]: groupValue,
|
||||
}),
|
||||
});
|
||||
|
||||
const analysisWithRelevantValues = {
|
||||
...analysisForGroupingField,
|
||||
fields: analysisForGroupingField.fields
|
||||
.filter((field) => {
|
||||
return !field.empty;
|
||||
})
|
||||
.map((field) => {
|
||||
const valuesFoundInEntity = field.values.filter((value) => {
|
||||
return (
|
||||
allValuesFromEntity.includes(value) ||
|
||||
allValuesFromEntity.some((valueFromEntity) => {
|
||||
return (
|
||||
typeof valueFromEntity === 'string' &&
|
||||
typeof value === 'string' &&
|
||||
(value.includes(valueFromEntity) || valueFromEntity.includes(value))
|
||||
);
|
||||
})
|
||||
);
|
||||
});
|
||||
return {
|
||||
...field,
|
||||
values: valuesFoundInEntity,
|
||||
};
|
||||
}),
|
||||
};
|
||||
|
||||
return {
|
||||
groupingField,
|
||||
key: groupValue,
|
||||
highlight: hit.highlight!,
|
||||
analysis: sortAndTruncateAnalyzedFields(analysisWithRelevantValues),
|
||||
};
|
||||
});
|
||||
})
|
||||
);
|
||||
|
||||
return groupingFieldAnalysis.map(({ key, highlight, analysis: analysisForGroupingField }) => {
|
||||
return {
|
||||
entity: {
|
||||
[groupingField]: key,
|
||||
},
|
||||
highlight,
|
||||
analysis: analysisForGroupingField,
|
||||
};
|
||||
});
|
||||
}
|
||||
}
|
|
@ -0,0 +1,159 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import stringify from 'json-stable-stringify';
|
||||
import pLimit from 'p-limit';
|
||||
import { RelatedEntityFromSearchResults } from '.';
|
||||
import {
|
||||
RCA_PROMPT_DEPENDENCIES,
|
||||
RCA_PROMPT_ENTITIES,
|
||||
RCA_SYSTEM_PROMPT_BASE,
|
||||
} from '../../prompts';
|
||||
import { RootCauseAnalysisContext } from '../../types';
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { getPreviouslyInvestigatedEntities } from '../../util/get_previously_investigated_entities';
|
||||
import { toBlockquote } from '../../util/to_blockquote';
|
||||
|
||||
export interface RelatedEntityDescription {
|
||||
entity: Record<string, string>;
|
||||
reason: string;
|
||||
confidence: string;
|
||||
}
|
||||
|
||||
export async function extractRelatedEntities({
|
||||
entity,
|
||||
entityReport,
|
||||
summaries,
|
||||
foundEntities,
|
||||
context,
|
||||
rcaContext: { events, connectorId, inferenceClient },
|
||||
}: {
|
||||
foundEntities: RelatedEntityFromSearchResults[];
|
||||
entity: Record<string, string>;
|
||||
entityReport: string;
|
||||
summaries: string[];
|
||||
context: string;
|
||||
rcaContext: Pick<RootCauseAnalysisContext, 'events' | 'connectorId' | 'inferenceClient'>;
|
||||
}): Promise<{ relatedEntities: RelatedEntityDescription[] }> {
|
||||
const system = `${RCA_SYSTEM_PROMPT_BASE}
|
||||
|
||||
${RCA_PROMPT_ENTITIES}
|
||||
|
||||
${RCA_PROMPT_DEPENDENCIES}`;
|
||||
|
||||
const previouslyInvestigatedEntities = getPreviouslyInvestigatedEntities({ events });
|
||||
|
||||
const previouslyInvestigatedEntitiesPrompt = previouslyInvestigatedEntities.length
|
||||
? `## Previously investigated entities
|
||||
|
||||
${previouslyInvestigatedEntities
|
||||
.map((prevEntity) => `- ${formatEntity(prevEntity)}`)
|
||||
.join('\n')}`
|
||||
: '';
|
||||
|
||||
const prompts = summaries.map((summary) => {
|
||||
return `
|
||||
# Investigated entity
|
||||
|
||||
${formatEntity(entity)}
|
||||
|
||||
# Report
|
||||
|
||||
${toBlockquote(entityReport)}
|
||||
|
||||
# Related entities report
|
||||
|
||||
${toBlockquote(summary)}
|
||||
|
||||
${previouslyInvestigatedEntitiesPrompt}
|
||||
|
||||
# Context
|
||||
|
||||
${context}
|
||||
|
||||
# Task
|
||||
|
||||
Your current task is to extract relevant entities as a data structure from the
|
||||
related entities report. Order them by relevance to the investigation, put the
|
||||
most relevant ones first.
|
||||
`;
|
||||
});
|
||||
|
||||
const limiter = pLimit(5);
|
||||
|
||||
const allEvents = await Promise.all(
|
||||
prompts.map(async (input) => {
|
||||
const completeEvent = await limiter(() =>
|
||||
inferenceClient.output({
|
||||
id: 'get_entity_relationships',
|
||||
connectorId,
|
||||
system,
|
||||
input,
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
related_entities: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
entity: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
field: {
|
||||
type: 'string',
|
||||
},
|
||||
value: {
|
||||
type: 'string',
|
||||
},
|
||||
},
|
||||
required: ['field', 'value'],
|
||||
},
|
||||
reason: {
|
||||
type: 'string',
|
||||
description: 'Describe why this entity might be relevant. Provide evidence.',
|
||||
},
|
||||
confidence: {
|
||||
type: 'string',
|
||||
description:
|
||||
'Describe how confident you are in your conclusion about this relationship: low, moderate, high',
|
||||
},
|
||||
},
|
||||
|
||||
required: ['entity', 'reason', 'confidence'],
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ['related_entities'],
|
||||
} as const,
|
||||
})
|
||||
);
|
||||
return completeEvent.output;
|
||||
})
|
||||
);
|
||||
|
||||
const foundEntityIds = foundEntities.map(({ entity: foundEntity }) => stringify(foundEntity));
|
||||
|
||||
const relatedEntities = allEvents
|
||||
.flat()
|
||||
.flatMap((event) => {
|
||||
return event.related_entities.map((item) => {
|
||||
return {
|
||||
entity: { [item.entity.field]: item.entity.value },
|
||||
reason: item.reason,
|
||||
confidence: item.confidence,
|
||||
};
|
||||
});
|
||||
})
|
||||
.filter((item) => {
|
||||
return foundEntityIds.includes(stringify(item.entity));
|
||||
});
|
||||
|
||||
return {
|
||||
relatedEntities,
|
||||
};
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { InferenceClient } from '@kbn/inference-plugin/server';
|
||||
import { Logger } from '@kbn/logging';
|
||||
import {
|
||||
DocumentAnalysis,
|
||||
TruncatedDocumentAnalysis,
|
||||
} from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
|
||||
import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client';
|
||||
import { FieldPatternResultWithChanges } from '@kbn/observability-utils-server/entities/get_log_patterns';
|
||||
import {
|
||||
analyzeFetchedRelatedEntities,
|
||||
RelatedEntityFromSearchResults,
|
||||
} from './analyze_fetched_related_entities';
|
||||
import {
|
||||
RelatedEntityKeywordSearch,
|
||||
writeKeywordSearchForRelatedEntities,
|
||||
} from './write_keyword_searches_for_related_entities';
|
||||
import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
|
||||
|
||||
export type { RelatedEntityFromSearchResults };
|
||||
|
||||
export async function findRelatedEntities({
|
||||
connectorId,
|
||||
inferenceClient,
|
||||
start,
|
||||
end,
|
||||
index,
|
||||
esClient,
|
||||
entity,
|
||||
analysis,
|
||||
logger,
|
||||
context,
|
||||
ownPatterns,
|
||||
patternsFromOtherEntities,
|
||||
kbEntries,
|
||||
}: {
|
||||
connectorId: string;
|
||||
inferenceClient: InferenceClient;
|
||||
start: number;
|
||||
end: number;
|
||||
index: string | string[];
|
||||
esClient: ObservabilityElasticsearchClient;
|
||||
entity: Record<string, string>;
|
||||
analysis: {
|
||||
truncated: TruncatedDocumentAnalysis;
|
||||
full: DocumentAnalysis;
|
||||
};
|
||||
logger: Logger;
|
||||
context: string;
|
||||
ownPatterns: FieldPatternResultWithChanges[];
|
||||
patternsFromOtherEntities: FieldPatternResultWithChanges[];
|
||||
kbEntries: ScoredKnowledgeBaseEntry[];
|
||||
}): Promise<{
|
||||
searches: RelatedEntityKeywordSearch[];
|
||||
summaries: string[];
|
||||
foundEntities: RelatedEntityFromSearchResults[];
|
||||
}> {
|
||||
const { groupingFields, searches } = await writeKeywordSearchForRelatedEntities({
|
||||
connectorId,
|
||||
inferenceClient,
|
||||
entity,
|
||||
analysis: analysis.truncated,
|
||||
ownPatterns,
|
||||
context,
|
||||
kbEntries,
|
||||
});
|
||||
|
||||
const { summaries, foundEntities } = await analyzeFetchedRelatedEntities({
|
||||
entity,
|
||||
connectorId,
|
||||
start,
|
||||
end,
|
||||
esClient,
|
||||
index,
|
||||
inferenceClient,
|
||||
searches,
|
||||
groupingFields,
|
||||
logger,
|
||||
analysis,
|
||||
ownPatterns,
|
||||
patternsFromOtherEntities,
|
||||
context,
|
||||
kbEntries,
|
||||
});
|
||||
|
||||
return {
|
||||
searches,
|
||||
summaries,
|
||||
foundEntities,
|
||||
};
|
||||
}
|
|
@ -0,0 +1,199 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { InferenceClient } from '@kbn/inference-plugin/server';
|
||||
import { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
|
||||
import { FieldPatternResultWithChanges } from '@kbn/observability-utils-server/entities/get_log_patterns';
|
||||
import { RCA_PROMPT_ENTITIES, RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries';
|
||||
import { toBlockquote } from '../../util/to_blockquote';
|
||||
import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
|
||||
|
||||
const SYSTEM_PROMPT_ADDENDUM = `# Guide: Constructing Keyword Searches to Find Related Entities
|
||||
|
||||
When investigating issues like elevated failure rates for a
|
||||
specific endpoint, you can use the metadata at hand (IP addresses,
|
||||
URLs, session IDs, tracing IDs, etc.) to build targeted keyword searches.
|
||||
By extracting meaningful fragments from the data, you can correlate
|
||||
related services or hosts across distributed systems. Here’s how
|
||||
you can break down the metadata and format your searches.
|
||||
|
||||
## Grouping fields
|
||||
|
||||
Define grouping fields for the entities you want to extract. For
|
||||
instance, "service.name" if you are looking for services, or
|
||||
"kubernetes.pod.name" if you are looking for pods. Focus
|
||||
on services, unless you are looking for deployment or
|
||||
configuration changes.
|
||||
|
||||
---
|
||||
|
||||
## Key Metadata and Search Format
|
||||
|
||||
### Example: Investigating a service failure for \`/api/products\`
|
||||
|
||||
You can break down various pieces of metadata into searchable
|
||||
fragments. For each value, include a short description of its
|
||||
relationship to the investigation. This value will be used
|
||||
by the system to determine the relevance of a given entity
|
||||
that matches the search request.
|
||||
|
||||
### 1. **IP Address and Port**
|
||||
- **Fragments:**
|
||||
- \`"10.44.0.11:8080"\`: Full address.
|
||||
- \`"10.44.0.11"\`: IP address only.
|
||||
- \`"8080"\`: Port number.
|
||||
- **Appears as:** This IP address and port are referenced as
|
||||
<ip-field-name> and <port-field-name> in the investigated entity
|
||||
<entity-name>..
|
||||
|
||||
### 2. **Outgoing Request URL**
|
||||
- **Fragments:**
|
||||
- \`"http://called-service/api/product"\`: Full outgoing URL.
|
||||
- \`"/api/product*"\`: Endpoint path.
|
||||
- \`"called-service"\`: Service name of the upstream dependency.
|
||||
- **Appears as:** These URL fragments appear as attributes.request.url
|
||||
in the investigated entity <entity-name>. They could appear as referer
|
||||
in the upstream dependency.
|
||||
|
||||
### 3. **Parent and Span IDs**
|
||||
- **Fragments:**
|
||||
- \`"000aa"\`: Parent ID.
|
||||
- \`"000bbb"\`: Span ID.
|
||||
- **Relationship:** These ids appear as span.id and parent.id in the
|
||||
investigated entity <entity-name>. They could be referring to spans
|
||||
found on upstream or downstream services.
|
||||
|
||||
---
|
||||
|
||||
## Example Search Format in JSON
|
||||
|
||||
To structure your keyword search, format the fragments and their
|
||||
relationships in a JSON array like this:
|
||||
|
||||
\`\`\`json
|
||||
{
|
||||
"groupingFields": [ "service.name" ],
|
||||
"values": [
|
||||
{
|
||||
"fragments": [
|
||||
"10.44.0.11:8080",
|
||||
"10.44.0.11",
|
||||
"8080"
|
||||
],
|
||||
"appearsAs": "This IP address and port are referenced as <ip-field-name> and <port-field-name> in the investigated entity <entity-name>."
|
||||
},
|
||||
{
|
||||
"fragments": [
|
||||
"http://<upstream-service>/api/product",
|
||||
"/api/product",
|
||||
"<upstream-service>"
|
||||
],
|
||||
"relationship": "These URL fragments appear as attributes.request.url in the investigated entity <entity-name>."
|
||||
},
|
||||
{
|
||||
"fragments": [
|
||||
"000aa",
|
||||
"000bbb"
|
||||
],
|
||||
"relationship": " These ids appear as span.id and parent.id in the investigated entity <entity-name>. They could be referring to spans found on upstream or downstream services"
|
||||
}
|
||||
]
|
||||
}`;
|
||||
|
||||
export interface RelatedEntityKeywordSearch {
|
||||
fragments: string[];
|
||||
appearsAs: string;
|
||||
}
|
||||
|
||||
export async function writeKeywordSearchForRelatedEntities({
|
||||
connectorId,
|
||||
inferenceClient,
|
||||
entity,
|
||||
analysis,
|
||||
ownPatterns,
|
||||
context,
|
||||
kbEntries,
|
||||
}: {
|
||||
connectorId: string;
|
||||
inferenceClient: InferenceClient;
|
||||
entity: Record<string, string>;
|
||||
analysis: TruncatedDocumentAnalysis;
|
||||
ownPatterns: FieldPatternResultWithChanges[];
|
||||
context: string;
|
||||
kbEntries: ScoredKnowledgeBaseEntry[];
|
||||
}): Promise<{
|
||||
groupingFields: string[];
|
||||
searches: RelatedEntityKeywordSearch[];
|
||||
}> {
|
||||
const logPatternsPrompt = ownPatterns.length
|
||||
? JSON.stringify(
|
||||
ownPatterns.map((pattern) => ({ regex: pattern.regex, sample: pattern.sample }))
|
||||
)
|
||||
: 'No log patterns found';
|
||||
|
||||
return inferenceClient
|
||||
.output({
|
||||
id: 'extract_keyword_searches',
|
||||
connectorId,
|
||||
system: `${RCA_SYSTEM_PROMPT_BASE}
|
||||
|
||||
${RCA_PROMPT_ENTITIES}`,
|
||||
input: `Your current task is to to extract keyword searches
|
||||
to find related entities to the entity ${formatEntity(entity)},
|
||||
based on the following context:
|
||||
|
||||
## Investigation context
|
||||
${toBlockquote(context)}
|
||||
|
||||
${serializeKnowledgeBaseEntries(kbEntries)}
|
||||
|
||||
## Data analysis
|
||||
${JSON.stringify(analysis)}
|
||||
|
||||
## Log patterns
|
||||
|
||||
${logPatternsPrompt}
|
||||
|
||||
## Instructions
|
||||
${SYSTEM_PROMPT_ADDENDUM}`,
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
groupingFields: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'string',
|
||||
},
|
||||
},
|
||||
searches: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
fragments: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'string',
|
||||
},
|
||||
},
|
||||
appearsAs: {
|
||||
type: 'string',
|
||||
description:
|
||||
'Describe in what fields these values appear as in the investigated entity. You can mention multiple fields if applicable',
|
||||
},
|
||||
},
|
||||
required: ['fragments', 'appearsAs'],
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ['searches', 'groupingFields'],
|
||||
} as const,
|
||||
})
|
||||
.then((event) => event.output);
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
|
||||
import { RootCauseAnalysisContext } from '../../types';
|
||||
import { stringifySummaries } from '../../util/stringify_summaries';
|
||||
|
||||
type SignificantEventSeverity = 'info' | 'unusual' | 'warning' | 'critical';
|
||||
|
||||
type SignificantEventType = 'alert' | 'slo' | 'event';
|
||||
|
||||
export interface SignificantEvent {
|
||||
severity: SignificantEventSeverity;
|
||||
'@timestamp'?: string;
|
||||
description: string;
|
||||
type: SignificantEventType;
|
||||
}
|
||||
|
||||
export interface SignificantEventsTimeline {
|
||||
events: SignificantEvent[];
|
||||
}
|
||||
|
||||
export async function generateSignificantEventsTimeline({
|
||||
report,
|
||||
rcaContext,
|
||||
}: {
|
||||
report: string;
|
||||
rcaContext: RootCauseAnalysisContext;
|
||||
}): Promise<SignificantEventsTimeline> {
|
||||
const { connectorId, inferenceClient } = rcaContext;
|
||||
|
||||
return await inferenceClient
|
||||
.output({
|
||||
id: 'generate_timeline',
|
||||
system: RCA_SYSTEM_PROMPT_BASE,
|
||||
connectorId,
|
||||
input: `Your current task is to generate a timeline
|
||||
of significant events, based on the given RCA report,
|
||||
according to a structured schema. This timeline will
|
||||
be presented to the user as a visualization.
|
||||
|
||||
${stringifySummaries(rcaContext)}
|
||||
|
||||
# Report
|
||||
|
||||
${report}
|
||||
`,
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
events: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
timestamp: {
|
||||
type: 'string',
|
||||
description: 'The ISO timestamp of when the event occurred',
|
||||
},
|
||||
severity: {
|
||||
type: 'string',
|
||||
enum: ['info', 'unusual', 'warning', 'critical'],
|
||||
},
|
||||
type: {
|
||||
type: 'string',
|
||||
enum: ['alert', 'slo', 'event'],
|
||||
},
|
||||
description: {
|
||||
type: 'string',
|
||||
description: 'A description of the event',
|
||||
},
|
||||
},
|
||||
required: ['severity', 'description'],
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ['events'],
|
||||
} as const,
|
||||
})
|
||||
.then((timelineCompleteEvent) => {
|
||||
return {
|
||||
events: timelineCompleteEvent.output.events.map((event) => {
|
||||
return {
|
||||
'@timestamp': event.timestamp,
|
||||
severity: event.severity,
|
||||
type: event.type ?? 'event',
|
||||
description: event.description,
|
||||
};
|
||||
}),
|
||||
};
|
||||
});
|
||||
}
|
|
@ -0,0 +1,185 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { ShortIdTable } from '@kbn/observability-ai-assistant-plugin/common';
|
||||
import { decode, encode } from 'gpt-tokenizer';
|
||||
import { orderBy, sumBy } from 'lodash';
|
||||
import { RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
|
||||
import { RootCauseAnalysisContext } from '../../types';
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { toBlockquote } from '../../util/to_blockquote';
|
||||
|
||||
export interface ScoredKnowledgeBaseEntry {
|
||||
id: string;
|
||||
text: string;
|
||||
tokens: number;
|
||||
score: number;
|
||||
truncated?: {
|
||||
tokens: number;
|
||||
text: string;
|
||||
};
|
||||
}
|
||||
|
||||
export async function getKnowledgeBaseEntries({
|
||||
entity,
|
||||
context,
|
||||
rcaContext,
|
||||
maxTokens: maxTokensForEntries,
|
||||
}: {
|
||||
entity: Record<string, string>;
|
||||
context: string;
|
||||
rcaContext: RootCauseAnalysisContext;
|
||||
maxTokens: number;
|
||||
}): Promise<ScoredKnowledgeBaseEntry[]> {
|
||||
const response = await rcaContext.observabilityAIAssistantClient.recall({
|
||||
queries: [
|
||||
...Object.values(entity).map((value) => ({ text: value, boost: 3 })),
|
||||
{ text: context },
|
||||
],
|
||||
limit: {
|
||||
tokenCount: Number.MAX_VALUE,
|
||||
},
|
||||
});
|
||||
|
||||
const { inferenceClient, connectorId } = rcaContext;
|
||||
|
||||
const shortIdTable = new ShortIdTable();
|
||||
|
||||
const system = RCA_SYSTEM_PROMPT_BASE;
|
||||
|
||||
const input = `Re-order the attached documents, based on relevance to the context.
|
||||
Score them between 1 and 5, based on their relative relevance to each other. The
|
||||
most relevant doc should be scored 5, and the least relevant doc should be scored
|
||||
1.
|
||||
|
||||
# Entity
|
||||
|
||||
${formatEntity(entity)}
|
||||
|
||||
# Context
|
||||
|
||||
${toBlockquote(context)}
|
||||
`;
|
||||
|
||||
const maxTokensForScoring = rcaContext.tokenLimit - encode(system + input).length - 1_000;
|
||||
|
||||
const entriesWithTokens = response.map((entry) => {
|
||||
return {
|
||||
id: entry.id,
|
||||
text: entry.text,
|
||||
tokens: encode(entry.text),
|
||||
};
|
||||
});
|
||||
|
||||
const totalTokenCount = sumBy(entriesWithTokens, (entry) => entry.tokens.length);
|
||||
|
||||
const truncatedEntriesWithShortIds = entriesWithTokens.map((entry) => {
|
||||
const tokensForEntry = Math.floor(
|
||||
(entry.tokens.length / totalTokenCount) * maxTokensForScoring
|
||||
);
|
||||
|
||||
const truncatedText = decode(entry.tokens.slice(0, tokensForEntry));
|
||||
const isTruncated = tokensForEntry < entry.tokens.length;
|
||||
|
||||
return {
|
||||
id: entry.id,
|
||||
tokens: entry.tokens,
|
||||
shortId: shortIdTable.take(entry.id),
|
||||
text: entry.text,
|
||||
truncatedText,
|
||||
isTruncated,
|
||||
};
|
||||
});
|
||||
|
||||
const scoredEntries = await inferenceClient.output({
|
||||
id: 'score_entries',
|
||||
connectorId,
|
||||
system: RCA_SYSTEM_PROMPT_BASE,
|
||||
input: `${input}
|
||||
|
||||
${truncatedEntriesWithShortIds
|
||||
.map((entry) => {
|
||||
return `# ID: ${entry.shortId}
|
||||
|
||||
## Text (${entry.isTruncated ? `truncated` : `not truncated `})
|
||||
|
||||
${toBlockquote(entry.truncatedText)}
|
||||
`;
|
||||
})
|
||||
.join('\n\n')}
|
||||
`,
|
||||
stream: false,
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
docs: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
score: {
|
||||
type: 'number',
|
||||
description:
|
||||
'A score between 1 and 5, with 5 being most relevant, and 1 being least relevant',
|
||||
},
|
||||
id: {
|
||||
type: 'string',
|
||||
},
|
||||
},
|
||||
required: ['score', 'id'],
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ['docs'],
|
||||
},
|
||||
} as const);
|
||||
|
||||
const scoresById = new Map(scoredEntries.output.docs.map((doc) => [doc.id, doc.score]));
|
||||
|
||||
const entriesWithScore = truncatedEntriesWithShortIds.map((entry) => {
|
||||
const score = scoresById.get(entry.shortId) ?? 0;
|
||||
return {
|
||||
...entry,
|
||||
score,
|
||||
};
|
||||
});
|
||||
|
||||
const sortedEntries = orderBy(entriesWithScore, (entry) => entry.score, 'desc');
|
||||
|
||||
const returnedEntries: ScoredKnowledgeBaseEntry[] = [];
|
||||
|
||||
const tokensLeft = maxTokensForEntries;
|
||||
|
||||
sortedEntries.forEach((entry) => {
|
||||
if (entry.tokens.length <= tokensLeft) {
|
||||
returnedEntries.push({
|
||||
id: entry.id,
|
||||
text: entry.text,
|
||||
tokens: entry.tokens.length,
|
||||
score: entry.score,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const tokensToTake = tokensLeft;
|
||||
if (tokensToTake > 0) {
|
||||
const tookTokens = entry.tokens.slice(0, tokensToTake);
|
||||
returnedEntries.push({
|
||||
id: entry.id,
|
||||
text: entry.text,
|
||||
tokens: entry.tokens.length,
|
||||
score: entry.score,
|
||||
truncated: {
|
||||
text: decode(tookTokens),
|
||||
tokens: tookTokens.length,
|
||||
},
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return returnedEntries;
|
||||
}
|
|
@ -0,0 +1,268 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { getEntityKuery } from '@kbn/observability-utils-common/entities/get_entity_kuery';
|
||||
import { sortAndTruncateAnalyzedFields } from '@kbn/observability-utils-common/llm/log_analysis/sort_and_truncate_analyzed_fields';
|
||||
import { analyzeDocuments } from '@kbn/observability-utils-server/entities/analyze_documents';
|
||||
import { getDataStreamsForEntity } from '@kbn/observability-utils-server/entities/get_data_streams_for_entity';
|
||||
import { getAlertsForEntity } from '@kbn/observability-utils-server/entities/signals/get_alerts_for_entity';
|
||||
import { getSlosForEntity } from '@kbn/observability-utils-server/entities/signals/get_slos_for_entity';
|
||||
import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client';
|
||||
import { RootCauseAnalysisContext } from '../../types';
|
||||
import { stringifySummaries } from '../../util/stringify_summaries';
|
||||
import { analyzeLogPatterns } from '../analyze_log_patterns';
|
||||
import { describeEntity } from '../describe_entity';
|
||||
import { describeLogPatterns } from '../describe_log_patterns';
|
||||
import { findRelatedEntities } from '../find_related_entities';
|
||||
import { extractRelatedEntities } from '../find_related_entities/extract_related_entities';
|
||||
import { writeEntityInvestigationReport } from '../write_entity_investigation_report';
|
||||
import { EntityInvestigation } from './types';
|
||||
import { getKnowledgeBaseEntries } from '../get_knowledge_base_entries';
|
||||
|
||||
export type { EntityInvestigation };
|
||||
|
||||
export interface EntityInvestigationParameters {
|
||||
entity: Record<string, string>;
|
||||
rcaContext: RootCauseAnalysisContext;
|
||||
context: string;
|
||||
}
|
||||
|
||||
export async function investigateEntity(
|
||||
parameters: EntityInvestigationParameters
|
||||
): Promise<EntityInvestigation | undefined> {
|
||||
const {
|
||||
entity,
|
||||
rcaContext,
|
||||
rcaContext: {
|
||||
inferenceClient,
|
||||
connectorId,
|
||||
start,
|
||||
end,
|
||||
esClient,
|
||||
logger: parentLogger,
|
||||
indices,
|
||||
},
|
||||
context,
|
||||
} = parameters;
|
||||
const kuery = getEntityKuery(entity);
|
||||
|
||||
const logger = parentLogger.get('investigateEntity');
|
||||
|
||||
logger.debug(() => `Investigating entity: ${JSON.stringify(parameters.entity)}`);
|
||||
|
||||
const kbPromise = getKnowledgeBaseEntries({
|
||||
entity,
|
||||
context,
|
||||
rcaContext,
|
||||
maxTokens: 4_000,
|
||||
}).catch((error) => {
|
||||
logger.error(`Could not fetch entries from knowledge base`);
|
||||
logger.error(error);
|
||||
return [];
|
||||
});
|
||||
|
||||
const [{ dataStreams }, alerts, slos] = await getSignals({ ...parameters, kuery });
|
||||
|
||||
logger.debug(
|
||||
() =>
|
||||
`Signals for entity ${JSON.stringify(entity)}: ${dataStreams.length} data streams, ${
|
||||
alerts.length
|
||||
} alerts, ${slos.length} slos`
|
||||
);
|
||||
|
||||
if (!dataStreams.length) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const fullAnalysis = await analyzeDataStreamsForEntity({
|
||||
start,
|
||||
end,
|
||||
esClient,
|
||||
kuery,
|
||||
dataStreams,
|
||||
});
|
||||
|
||||
const truncatedAnalysis = sortAndTruncateAnalyzedFields(fullAnalysis);
|
||||
|
||||
const kbEntries = await kbPromise;
|
||||
|
||||
const { ownPatterns, patternsFromOtherEntities } = await analyzeLogPatterns({
|
||||
allAnalysis: [{ index: dataStreams, analysis: truncatedAnalysis }],
|
||||
entity,
|
||||
system: stringifySummaries(rcaContext),
|
||||
cutoff: {
|
||||
significance: 'high',
|
||||
},
|
||||
rcaContext,
|
||||
kbEntries,
|
||||
});
|
||||
|
||||
logger.trace(
|
||||
() => `Analyzed log patterns: ${JSON.stringify({ ownPatterns, patternsFromOtherEntities })}`
|
||||
);
|
||||
|
||||
const entityReportPromise = Promise.all([
|
||||
describeEntity({
|
||||
inferenceClient,
|
||||
analysis: truncatedAnalysis,
|
||||
connectorId,
|
||||
contextForEntityInvestigation: context,
|
||||
entity,
|
||||
ownPatterns,
|
||||
kbEntries,
|
||||
}),
|
||||
describeLogPatterns({
|
||||
analysis: truncatedAnalysis,
|
||||
connectorId,
|
||||
contextForEntityInvestigation: context,
|
||||
entity,
|
||||
inferenceClient,
|
||||
ownPatterns,
|
||||
patternsFromOtherEntities,
|
||||
kbEntries,
|
||||
}),
|
||||
]).then(([entityDescription, logPatternDescription]) => {
|
||||
return writeEntityInvestigationReport({
|
||||
connectorId,
|
||||
inferenceClient,
|
||||
entityDescription,
|
||||
logPatternDescription,
|
||||
contextForEntityInvestigation: context,
|
||||
entity,
|
||||
}).then((report) => {
|
||||
return {
|
||||
description: entityDescription,
|
||||
logPatternDescription,
|
||||
report,
|
||||
};
|
||||
});
|
||||
});
|
||||
|
||||
const [entityReport, relatedEntitiesResults] = await Promise.all([
|
||||
entityReportPromise,
|
||||
findRelatedEntities({
|
||||
connectorId,
|
||||
end,
|
||||
entity,
|
||||
esClient,
|
||||
index: indices.logs,
|
||||
inferenceClient,
|
||||
logger,
|
||||
start,
|
||||
context,
|
||||
analysis: {
|
||||
full: fullAnalysis,
|
||||
truncated: truncatedAnalysis,
|
||||
},
|
||||
ownPatterns,
|
||||
patternsFromOtherEntities,
|
||||
kbEntries,
|
||||
}).then(async ({ searches, summaries, foundEntities }) => {
|
||||
const report = await entityReportPromise;
|
||||
|
||||
const { relatedEntities } = await extractRelatedEntities({
|
||||
entityReport: report.report,
|
||||
summaries,
|
||||
entity,
|
||||
foundEntities,
|
||||
context,
|
||||
rcaContext,
|
||||
});
|
||||
|
||||
return {
|
||||
relatedEntities,
|
||||
foundEntities,
|
||||
searches,
|
||||
summaries,
|
||||
};
|
||||
}),
|
||||
]);
|
||||
|
||||
return {
|
||||
entity,
|
||||
summary: [
|
||||
entityReport.description,
|
||||
entityReport.logPatternDescription.content,
|
||||
entityReport.report,
|
||||
].join('\n\n'),
|
||||
relatedEntities: relatedEntitiesResults.relatedEntities,
|
||||
attachments: {
|
||||
alerts,
|
||||
slos,
|
||||
analysis: truncatedAnalysis,
|
||||
ownPatterns,
|
||||
patternsFromOtherEntities,
|
||||
searches: relatedEntitiesResults.searches,
|
||||
relatedEntitiesSummaries: relatedEntitiesResults.summaries,
|
||||
kbEntries,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async function getSignals({
|
||||
entity,
|
||||
kuery,
|
||||
rcaContext: { start, end, esClient, rulesClient, alertsClient, indices, spaceId },
|
||||
}: {
|
||||
kuery: string;
|
||||
entity: Record<string, unknown>;
|
||||
rcaContext: Pick<
|
||||
RootCauseAnalysisContext,
|
||||
'start' | 'end' | 'esClient' | 'rulesClient' | 'alertsClient' | 'indices' | 'spaceId'
|
||||
>;
|
||||
}) {
|
||||
return await Promise.all([
|
||||
getDataStreamsForEntity({
|
||||
esClient,
|
||||
kuery,
|
||||
index: indices.logs.concat(indices.traces),
|
||||
}),
|
||||
getAlertsForEntity({ entity, rulesClient, alertsClient, start, end, size: 10 }).then(
|
||||
(alertsResponse) => {
|
||||
return alertsResponse.hits.hits.map((hit) => hit._source!);
|
||||
}
|
||||
),
|
||||
getSlosForEntity({
|
||||
entity,
|
||||
start,
|
||||
end,
|
||||
esClient,
|
||||
size: 1000,
|
||||
sloSummaryIndices: indices.sloSummaries,
|
||||
spaceId,
|
||||
}).then((slosResponse) => {
|
||||
return slosResponse.hits.hits.map((hit) => hit._source);
|
||||
}),
|
||||
]);
|
||||
}
|
||||
|
||||
async function analyzeDataStreamsForEntity({
|
||||
start,
|
||||
end,
|
||||
dataStreams,
|
||||
esClient,
|
||||
kuery,
|
||||
}: {
|
||||
start: number;
|
||||
end: number;
|
||||
kuery: string;
|
||||
dataStreams: string[];
|
||||
esClient: ObservabilityElasticsearchClient;
|
||||
}) {
|
||||
const analysis = await analyzeDocuments({
|
||||
esClient,
|
||||
start,
|
||||
end,
|
||||
index: dataStreams,
|
||||
kuery,
|
||||
});
|
||||
|
||||
return {
|
||||
...analysis,
|
||||
fields: analysis.fields.filter((field) => !field.empty),
|
||||
};
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { toBlockquote } from '../../util/to_blockquote';
|
||||
|
||||
export const getInvestigateEntityTaskPrompt = ({
|
||||
entity,
|
||||
contextForEntityInvestigation,
|
||||
}: {
|
||||
entity: Record<string, string>;
|
||||
contextForEntityInvestigation: string;
|
||||
}) => `## Entity-Based Investigation: Task Guide
|
||||
|
||||
In the investigation process, you are currently investigating the entity
|
||||
${formatEntity(entity)}. The context given for this investigation is:
|
||||
|
||||
${toBlockquote(contextForEntityInvestigation)}`;
|
|
@ -0,0 +1,31 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ParsedTechnicalFields } from '@kbn/rule-registry-plugin/common';
|
||||
import type { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
|
||||
import type { AnalyzeLogPatternOutput } from '../analyze_log_patterns';
|
||||
import type { RelatedEntityDescription } from '../find_related_entities/extract_related_entities';
|
||||
import type { RelatedEntityKeywordSearch } from '../find_related_entities/write_keyword_searches_for_related_entities';
|
||||
import type { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
|
||||
|
||||
export interface EntityInvestigation {
|
||||
entity: Record<string, string>;
|
||||
summary: string;
|
||||
relatedEntities: RelatedEntityDescription[];
|
||||
attachments: {
|
||||
analysis: TruncatedDocumentAnalysis;
|
||||
slos: Array<
|
||||
Record<string, any> & {
|
||||
status: 'VIOLATED' | 'DEGRADED' | 'HEALTHY' | 'NO_DATA';
|
||||
}
|
||||
>;
|
||||
alerts: ParsedTechnicalFields[];
|
||||
searches: RelatedEntityKeywordSearch[];
|
||||
relatedEntitiesSummaries: string[];
|
||||
kbEntries: ScoredKnowledgeBaseEntry[];
|
||||
} & AnalyzeLogPatternOutput;
|
||||
}
|
|
@ -0,0 +1,239 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { MessageRole } from '@kbn/inference-common';
|
||||
import { RCA_OBSERVE_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis';
|
||||
import { RCA_PROMPT_CHANGES, RCA_PROMPT_ENTITIES, RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
|
||||
import { ObservationToolMessage, RootCauseAnalysisContext } from '../../types';
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { getPreviouslyInvestigatedEntities } from '../../util/get_previously_investigated_entities';
|
||||
import { stringifySummaries } from '../../util/stringify_summaries';
|
||||
import { toBlockquote } from '../../util/to_blockquote';
|
||||
import { EntityInvestigation } from '../investigate_entity/types';
|
||||
|
||||
const INITIAL_OBSERVATION_TASK_GUIDE = `Your current task is to write observations based on the initial context. You
|
||||
should acknowledge the context briefly, and mention key observations from the
|
||||
initial context.
|
||||
|
||||
Then, briefly describe what change you are looking for. Are the symptoms:
|
||||
|
||||
- rapid, or gradual onset?
|
||||
- subtle or prounounced?
|
||||
|
||||
If possible, mention the time of the change.
|
||||
|
||||
When considering the initial context, reason about relevant changes to observe,
|
||||
such as short-lived versus persistent changes or singular events, like scale
|
||||
events, rollouts, or configuration changes.
|
||||
|
||||
After, taking into account the capabilities you have, plan for next steps.
|
||||
|
||||
Describe the next step, which is to investigate the entity found in the initial
|
||||
context. Only mention the entity (as a field/value). Do not mention any
|
||||
additional filters.
|
||||
|
||||
Be brief, accurate, and critical.`;
|
||||
|
||||
const INVESTIGATION_ADDENDUM = `
|
||||
**Task Guide: Observe the investigation results**
|
||||
|
||||
You will receive one or more investigations. These investigations mention:
|
||||
- a general characterization of the entity based on its data
|
||||
- relevant log patterns
|
||||
- other signals, like SLOs or alerts
|
||||
- possibly related entities, and investigation suggestions
|
||||
|
||||
First, you should briefly acknowledge the initial context of the investigation
|
||||
and where it stands.
|
||||
|
||||
Next, you should note key observations from the investigations, and how they relate
|
||||
to the ongoing investigation.
|
||||
|
||||
After, you should generate a timeline of significant events. For this timeline,
|
||||
include events from previous observations. Additionally, include significant
|
||||
events from the inspected investigations. Group events together in a topic
|
||||
if needed. Significant events are things like: an increase in errors, deployment
|
||||
events, a drop to zero for access logs, etc. In most cases, you do not want to
|
||||
mention individual log messages, unless it is a particularly significant event
|
||||
by itself.
|
||||
|
||||
For each event, mention:
|
||||
|
||||
- the timestamp of the event
|
||||
- the nature of the change, if applicable
|
||||
- data from the event, such as specific log patterns, alerts or slos
|
||||
- the meaning of the event and how it is related to the initial context
|
||||
|
||||
Do not include:
|
||||
- the time range from the investigation itself (start/end)
|
||||
- other events that occurred during the investigation itself, like running
|
||||
log analysis or other patterns
|
||||
|
||||
## Correlating significant events
|
||||
|
||||
When correlating significant events, pay close attention to the timestamp of
|
||||
the mentioned change, and how it correlates to the timestamp of the change you
|
||||
want to correlate it to, such as the start time of an alert. An alert might be
|
||||
delayed, but if you see many changes around a specific timestamp, and some of
|
||||
them being significantly earlier, or later, the latter group is likely not
|
||||
relevant.
|
||||
|
||||
## Context and reasoning
|
||||
|
||||
Next, use the timeline of events and the new observations to revise your
|
||||
analysis of the initial context and the ongoing investigation. Reason about
|
||||
how changes could be related: are they close in time, or far removed, compared
|
||||
to others? Is the type of change similar? Is the magnitude of the change similar?`;
|
||||
|
||||
const SUGGEST_NEXT_STEPS_PROMPT = `
|
||||
Next, consider next steps. it's always important to contextualize the significant
|
||||
in the initial context of the investigation. Focus on your strongest pieces of
|
||||
evidence. Your observations should be related to finding out the cause of the
|
||||
initial context of the investigation - you should not concern yourself with the
|
||||
impact on _other_ entities.
|
||||
|
||||
Suggest to conclude the process when:
|
||||
|
||||
- there is a clear and obvious root cause
|
||||
- you have investigated more than 10 entities
|
||||
- OR you cannot find any unhealthy entities
|
||||
- there are no more entities to investigate
|
||||
|
||||
If the conclusion is you need to continue your investigation, mention the entities
|
||||
that should be investigated. Do this only if there is a significant change one of
|
||||
the related entities will give you new insights into the root cause (instead of
|
||||
just the impact). DO NOT investigate an entity more than once.`;
|
||||
|
||||
const CONCLUDE_PROCESS_PROMPT = `
|
||||
You must suggest to conclude the process and write the final report, as your
|
||||
capabilities do not allow you go investigate more entities.`;
|
||||
|
||||
function getInitialPrompts(initialContext: string) {
|
||||
return {
|
||||
system: `${RCA_SYSTEM_PROMPT_BASE}
|
||||
|
||||
${RCA_PROMPT_ENTITIES}
|
||||
|
||||
${RCA_PROMPT_CHANGES}`,
|
||||
input: `## Context
|
||||
|
||||
${initialContext}
|
||||
|
||||
${INITIAL_OBSERVATION_TASK_GUIDE}`,
|
||||
};
|
||||
}
|
||||
|
||||
function getObserveInvestigationsPrompts({
|
||||
investigations,
|
||||
summaries,
|
||||
rcaContext,
|
||||
}: {
|
||||
investigations: EntityInvestigation[];
|
||||
summaries: ObservationStepSummary[];
|
||||
rcaContext: RootCauseAnalysisContext;
|
||||
}) {
|
||||
const previouslyInvestigatedEntities = getPreviouslyInvestigatedEntities(rcaContext);
|
||||
|
||||
const canContinue =
|
||||
summaries.length <= 5 &&
|
||||
investigations.filter((investigation) => 'summary' in investigation).length <= 10;
|
||||
|
||||
const investigationsPrompt = `Observe the following investigations that recently concluded:
|
||||
${investigations
|
||||
.map((investigation, index) => {
|
||||
return `## ${index + 1}: investigation of ${formatEntity(investigation.entity)}
|
||||
|
||||
${toBlockquote(investigation.summary)}
|
||||
|
||||
${
|
||||
investigation.relatedEntities.length
|
||||
? `### Relationships to ${formatEntity(investigation.entity)}
|
||||
|
||||
${toBlockquote(JSON.stringify(investigation.relatedEntities))}
|
||||
|
||||
`
|
||||
: ``
|
||||
}
|
||||
`;
|
||||
})
|
||||
.join('\n\n')}
|
||||
|
||||
${INVESTIGATION_ADDENDUM}
|
||||
|
||||
${
|
||||
canContinue
|
||||
? `${SUGGEST_NEXT_STEPS_PROMPT}
|
||||
|
||||
${
|
||||
previouslyInvestigatedEntities.length
|
||||
? `The following entities have been investigated previously.
|
||||
Do not investigate them again:
|
||||
|
||||
${previouslyInvestigatedEntities.map((entity) => `- ${JSON.stringify(entity)}`).join('\n')}`
|
||||
: ``
|
||||
}
|
||||
|
||||
`
|
||||
: CONCLUDE_PROCESS_PROMPT
|
||||
}
|
||||
|
||||
`;
|
||||
|
||||
const systemPrompt = `${RCA_SYSTEM_PROMPT_BASE}
|
||||
|
||||
${RCA_PROMPT_ENTITIES}
|
||||
|
||||
${stringifySummaries(rcaContext)}`;
|
||||
|
||||
return {
|
||||
system: systemPrompt,
|
||||
input: investigationsPrompt,
|
||||
};
|
||||
}
|
||||
|
||||
export interface ObservationStepSummary {
|
||||
investigations: EntityInvestigation[];
|
||||
content: string;
|
||||
}
|
||||
|
||||
export function observeInvestigationResults({
|
||||
rcaContext,
|
||||
rcaContext: { logger, events, initialContext, inferenceClient, connectorId },
|
||||
investigations,
|
||||
}: {
|
||||
rcaContext: RootCauseAnalysisContext;
|
||||
investigations: EntityInvestigation[];
|
||||
}): Promise<ObservationStepSummary> {
|
||||
const summaries = events
|
||||
.filter((event): event is ObservationToolMessage => {
|
||||
return event.role === MessageRole.Tool && event.name === RCA_OBSERVE_TOOL_NAME;
|
||||
})
|
||||
.map((event) => event.data);
|
||||
|
||||
logger.debug(
|
||||
() =>
|
||||
`Observing ${investigations.length} investigations (${summaries.length} previous summaries)`
|
||||
);
|
||||
|
||||
const { system, input } = investigations.length
|
||||
? getObserveInvestigationsPrompts({ summaries, investigations, rcaContext })
|
||||
: getInitialPrompts(initialContext);
|
||||
|
||||
return inferenceClient
|
||||
.output({
|
||||
id: 'observe',
|
||||
system,
|
||||
input,
|
||||
connectorId,
|
||||
})
|
||||
.then((outputCompleteEvent) => {
|
||||
return {
|
||||
content: outputCompleteEvent.content,
|
||||
investigations,
|
||||
};
|
||||
});
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { InferenceClient } from '@kbn/inference-plugin/server';
|
||||
import { RCA_PROMPT_SIGNIFICANT_EVENTS, RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
|
||||
import { formatEntity } from '../../util/format_entity';
|
||||
import { toBlockquote } from '../../util/to_blockquote';
|
||||
import { LogPatternDescription } from '../describe_log_patterns';
|
||||
import { getInvestigateEntityTaskPrompt } from '../investigate_entity/prompts';
|
||||
|
||||
export async function writeEntityInvestigationReport({
|
||||
inferenceClient,
|
||||
connectorId,
|
||||
entity,
|
||||
contextForEntityInvestigation,
|
||||
entityDescription,
|
||||
logPatternDescription,
|
||||
}: {
|
||||
inferenceClient: InferenceClient;
|
||||
connectorId: string;
|
||||
entity: Record<string, string>;
|
||||
contextForEntityInvestigation: string;
|
||||
entityDescription: string;
|
||||
logPatternDescription: LogPatternDescription;
|
||||
}): Promise<string> {
|
||||
const system = RCA_SYSTEM_PROMPT_BASE;
|
||||
|
||||
const shouldGenerateTimeline = logPatternDescription.interestingPatternCount > 0;
|
||||
|
||||
let input = `${getInvestigateEntityTaskPrompt({ entity, contextForEntityInvestigation })}
|
||||
|
||||
## Entity description
|
||||
|
||||
${toBlockquote(entityDescription)}
|
||||
|
||||
## Log pattern analysis
|
||||
|
||||
${toBlockquote(logPatternDescription.content)}
|
||||
|
||||
# Current task
|
||||
|
||||
Your current task is to write a report the investigation into ${formatEntity(entity)}.
|
||||
The log pattern analysis and entity description will be added to your report (at the
|
||||
top), so you don't need to repeat anything in it.`;
|
||||
|
||||
if (shouldGenerateTimeline) {
|
||||
input += `${RCA_PROMPT_SIGNIFICANT_EVENTS}\n\n`;
|
||||
}
|
||||
|
||||
input += `## Context and reasoning
|
||||
|
||||
Reason about the role that the entity plays in the investigation, given the context.
|
||||
mention evidence (hard pieces of data) when reasoning.
|
||||
|
||||
Do not suggest next steps - this will happen in a follow-up task.`;
|
||||
|
||||
if (shouldGenerateTimeline) {
|
||||
input += `## Format
|
||||
|
||||
Your reply should only contain two sections:
|
||||
|
||||
- Timeline of significant events
|
||||
- Context and reasoning
|
||||
`;
|
||||
} else {
|
||||
input += `## Format
|
||||
Your reply should only contain one section:
|
||||
- Context and reasoning
|
||||
`;
|
||||
}
|
||||
|
||||
const response = await inferenceClient.output({
|
||||
id: 'generate_entity_report',
|
||||
connectorId,
|
||||
input,
|
||||
system,
|
||||
});
|
||||
|
||||
return response.content;
|
||||
}
|
|
@ -0,0 +1,191 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { RCA_PROMPT_TIMELINE_GUIDE, RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
|
||||
import { RootCauseAnalysisContext } from '../../types';
|
||||
import { stringifySummaries } from '../../util/stringify_summaries';
|
||||
|
||||
const SYSTEM_PROMPT_ADDENDUM = `
|
||||
# Guide: Writing a Root Cause Analysis (RCA) Report
|
||||
|
||||
A Root Cause Analysis (RCA) report is the final step in a thorough
|
||||
investigation. Its purpose is to provide a clear, evidence-backed explanation of
|
||||
the underlying cause of an issue, as well as the impact. Even if no definitive
|
||||
root cause is identified, the report should reflect the findings, the hypotheses
|
||||
considered, and why certain assumptions were rejected. This guide will help
|
||||
structure an RCA that distinguishes between cause and effect, organizes
|
||||
evidence, and presents a timeline of key events.
|
||||
|
||||
---
|
||||
|
||||
## 1. Introduction
|
||||
|
||||
Start by summarizing the reason for the investigation. Provide a brief overview
|
||||
of the incident, the affected services or entities, and the initial alerts or
|
||||
issues that triggered the investigation.
|
||||
|
||||
- **What prompted the investigation?**
|
||||
- **Which entities were investigated?**
|
||||
- **Was there a specific hypothesis proposed at the outset?**
|
||||
|
||||
### Example:
|
||||
- **Overview:** This RCA report investigates the elevated error rates in
|
||||
\`myservice\` and its downstream dependencies, first identified through an SLO
|
||||
breach for the \`/api/submit\` endpoint. The investigation considered multiple
|
||||
entities and possible causes, including resource exhaustion and upstream service
|
||||
failures.
|
||||
|
||||
---
|
||||
|
||||
## 2. Investigation Summary
|
||||
|
||||
Summarize the key steps of the investigation, outlining:
|
||||
- **What hypotheses were proposed and why.**
|
||||
- **Which entities were investigated (e.g., \`myservice\`, \`myotherservice\`,
|
||||
\`notification-service\`).**
|
||||
- **Which hypotheses were discarded and why.**
|
||||
|
||||
For each hypothesis, present the supporting or contradicting evidence.
|
||||
|
||||
- **Strong Indicators:** Clear, repeated evidence pointing toward or against a
|
||||
hypothesis.
|
||||
- **Weak Indicators:** Inconsistent or ambiguous data that did not provide
|
||||
conclusive answers.
|
||||
|
||||
#### Example Format:
|
||||
- **Hypothesis 1:** Resource exhaustion in \`myservice\` caused elevated error
|
||||
rates.
|
||||
- **Evidence:**
|
||||
- **Strong:** Memory usage exceeded 90% during the incident.
|
||||
- **Weak:** CPU usage remained stable, making resource exhaustion a partial
|
||||
explanation.
|
||||
|
||||
- **Hypothesis 2:** Upstream latency from \`myotherservice\` caused delays.
|
||||
- **Evidence:**
|
||||
- **Strong:** API logs showed frequent retries and timeouts from
|
||||
\`myotherservice\`.
|
||||
- **Weak:** No errors were observed in \`myotherservice\` logs, suggesting an
|
||||
issue isolated to \`myservice\`.
|
||||
|
||||
---
|
||||
|
||||
## 3. Cause and Effect
|
||||
|
||||
Differentiate between the **cause** (what initiated the issue) and the
|
||||
**effect** (the impact or symptoms seen across the system). The cause should
|
||||
focus on the root, while the effect describes the wider system response or
|
||||
failure.
|
||||
|
||||
- **Root Cause:** Identify the underlying problem, supported by strong evidence.
|
||||
If no root cause is found, clearly state that the investigation did not lead to
|
||||
a conclusive root cause.
|
||||
|
||||
- **Impact:** Describe the downstream effects on other services, performance
|
||||
degradation, or SLO violations.
|
||||
|
||||
#### Example:
|
||||
- **Cause:** The root cause of the elevated error rate was identified as a
|
||||
memory leak in \`myservice\` that gradually led to resource exhaustion.
|
||||
- **Effect:** This led to elevated latency and increased error rates at the
|
||||
\`/api/submit\` endpoint, impacting downstream services like
|
||||
\`notification-service\` that rely on responses from \`myservice\`.
|
||||
|
||||
---
|
||||
|
||||
## 4. Evidence for Root Cause
|
||||
|
||||
Present a structured section summarizing all the evidence that supports the
|
||||
identified root cause. If no root cause is identified, outline the most
|
||||
significant findings that guided or limited the investigation.
|
||||
|
||||
- **Log Patterns:** Describe any abnormal log patterns observed, including
|
||||
notable change points.
|
||||
- **Alerts and SLOs:** Mention any alerts or breached SLOs that were triggered,
|
||||
including their relevance to the investigation.
|
||||
- **Data Analysis:** Include any data trends or patterns that were analyzed
|
||||
(e.g., resource usage spikes, network traffic).
|
||||
|
||||
#### Example:
|
||||
- **Memory Usage:** Logs showed a steady increase in memory consumption starting
|
||||
at 10:00 AM, peaking at 12:00 PM, where memory usage surpassed 90%, triggering
|
||||
the alert.
|
||||
- **Error Rate Logs:** Error rates for \`/api/submit\` began increasing around
|
||||
11:30 AM, correlating with the memory pressure in \`myservice\`.
|
||||
- **API Logs:** \`myotherservice\` API logs showed no internal errors, ruling out
|
||||
an upstream dependency as the primary cause.
|
||||
|
||||
---
|
||||
|
||||
## 5. Proposed Impact
|
||||
|
||||
Even if the root cause is clear, it is important to mention the impact of the
|
||||
issue on the system, users, and business operations. This includes:
|
||||
- **Affected Services:** Identify the services impacted (e.g., downstream
|
||||
dependencies).
|
||||
- **Performance Degradation:** Describe any SLO breaches or performance
|
||||
bottlenecks.
|
||||
- **User Impact:** Explain how users or clients were affected (e.g., higher
|
||||
latency, failed transactions).
|
||||
|
||||
#### Example:
|
||||
- **Impact:** The memory leak in \`myservice\` caused service degradation over a
|
||||
2-hour window. This affected \`/api/submit\`, causing delays and failed
|
||||
requests, ultimately impacting user-facing services relying on that endpoint.
|
||||
|
||||
---
|
||||
|
||||
## 6. Timeline of Significant Events
|
||||
|
||||
${RCA_PROMPT_TIMELINE_GUIDE}
|
||||
|
||||
---
|
||||
|
||||
## 7. Conclusion and Next Steps
|
||||
|
||||
Summarize the conclusions of the investigation:
|
||||
- If a root cause was identified, confirm it with the strongest supporting
|
||||
evidence.
|
||||
- If no root cause was found, state that clearly and suggest areas for further
|
||||
investigation or monitoring.
|
||||
|
||||
Finally, outline the next steps:
|
||||
- **Fixes or Mitigations:** Recommend any immediate actions (e.g., patch
|
||||
deployment, configuration changes).
|
||||
- **Monitoring Improvements:** Suggest new alerts or monitoring metrics based on
|
||||
lessons learned.
|
||||
- **Further Investigations:** If necessary, propose any follow-up investigations
|
||||
to gather more evidence.
|
||||
|
||||
#### Example:
|
||||
- **Conclusion:** The root cause of the incident was a memory leak in
|
||||
\`myservice\`, leading to resource exhaustion and elevated error rates at
|
||||
\`/api/submit\`. The leak has been patched, and monitoring has been improved to
|
||||
detect memory spikes earlier.
|
||||
- **Next Steps:** Monitor memory usage for the next 24 hours to ensure no
|
||||
recurrence. Investigate adding a memory ceiling for \`myservice\` to prevent
|
||||
future resource exhaustion.`;
|
||||
|
||||
export async function writeFinalReport({
|
||||
rcaContext,
|
||||
}: {
|
||||
rcaContext: RootCauseAnalysisContext;
|
||||
}): Promise<string> {
|
||||
const { inferenceClient, connectorId } = rcaContext;
|
||||
|
||||
return await inferenceClient
|
||||
.output({
|
||||
id: 'write_final_report',
|
||||
connectorId,
|
||||
system: `${RCA_SYSTEM_PROMPT_BASE}
|
||||
|
||||
${SYSTEM_PROMPT_ADDENDUM}`,
|
||||
input: `Write the RCA report, based on the observations.
|
||||
|
||||
${stringifySummaries(rcaContext)}`,
|
||||
})
|
||||
.then((event) => event.content);
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import {
|
||||
RCA_END_PROCESS_TOOL_NAME,
|
||||
RCA_INVESTIGATE_ENTITY_TOOL_NAME,
|
||||
RCA_OBSERVE_TOOL_NAME,
|
||||
} from '@kbn/observability-ai-common/root_cause_analysis/tool_names';
|
||||
|
||||
export const RCA_TOOLS = {
|
||||
[RCA_OBSERVE_TOOL_NAME]: {
|
||||
description: `Request an observation from another agent on
|
||||
the results of the returned investigations. The title should
|
||||
cover key new observations from the initial context or
|
||||
completed investigations, not anything about next steps.`,
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
title: {
|
||||
type: 'string',
|
||||
description: `A short title w/ the key new observations that will be displayed on top of a collapsible panel.`,
|
||||
},
|
||||
},
|
||||
required: ['title'],
|
||||
},
|
||||
},
|
||||
[RCA_END_PROCESS_TOOL_NAME]: {
|
||||
description: `End the RCA process by requesting a
|
||||
written report from another agent`,
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
endProcess: {
|
||||
type: 'boolean',
|
||||
},
|
||||
},
|
||||
required: ['endProcess'],
|
||||
},
|
||||
},
|
||||
[RCA_INVESTIGATE_ENTITY_TOOL_NAME]: {
|
||||
description: `Investigate an entity`,
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
context: {
|
||||
type: 'string',
|
||||
description: `Context that will be used in the investigation of the entity. Mention the initial context
|
||||
of the investigation, a very short summary of the last observation if applicable, and pieces
|
||||
of data that can be relevant for the investigation into the entity, such as timestamps or
|
||||
keywords`,
|
||||
},
|
||||
entity: {
|
||||
type: 'object',
|
||||
description: `The entity you want to investigate, such as a service. Use
|
||||
the Elasticsearch field names and values. For example, for services, use
|
||||
the following structure: ${JSON.stringify({
|
||||
entity: { field: 'service.name', value: 'opbeans-java' },
|
||||
})}`,
|
||||
properties: {
|
||||
field: {
|
||||
type: 'string',
|
||||
},
|
||||
value: {
|
||||
type: 'string',
|
||||
},
|
||||
},
|
||||
required: ['field', 'value'],
|
||||
},
|
||||
},
|
||||
required: ['context', 'entity'],
|
||||
},
|
||||
},
|
||||
} as const;
|
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import {
|
||||
ToolMessage,
|
||||
UserMessage,
|
||||
ToolCallsOf,
|
||||
ToolChoice,
|
||||
AssistantMessageOf,
|
||||
} from '@kbn/inference-common';
|
||||
import { InferenceClient } from '@kbn/inference-plugin/server';
|
||||
import { Logger } from '@kbn/logging';
|
||||
import { AlertsClient } from '@kbn/rule-registry-plugin/server';
|
||||
import { RulesClient } from '@kbn/alerting-plugin/server';
|
||||
import { ObservabilityAIAssistantClient } from '@kbn/observability-ai-assistant-plugin/server';
|
||||
import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client';
|
||||
import {
|
||||
RCA_END_PROCESS_TOOL_NAME,
|
||||
RCA_INVESTIGATE_ENTITY_TOOL_NAME,
|
||||
RCA_OBSERVE_TOOL_NAME,
|
||||
} from '@kbn/observability-ai-common/root_cause_analysis';
|
||||
import { ObservationStepSummary } from './tasks/observe_investigation_results';
|
||||
import { EntityInvestigation } from './tasks/investigate_entity';
|
||||
import { SignificantEventsTimeline } from './tasks/generate_timeline';
|
||||
import { RCA_TOOLS } from './tools';
|
||||
|
||||
export type EndProcessToolMessage = ToolMessage<
|
||||
typeof RCA_END_PROCESS_TOOL_NAME,
|
||||
{
|
||||
report: string;
|
||||
timeline: SignificantEventsTimeline;
|
||||
}
|
||||
>;
|
||||
|
||||
export type ObservationToolMessage = ToolMessage<
|
||||
typeof RCA_OBSERVE_TOOL_NAME,
|
||||
{
|
||||
content: string;
|
||||
},
|
||||
ObservationStepSummary
|
||||
>;
|
||||
|
||||
export type InvestigateEntityToolMessage = ToolMessage<
|
||||
typeof RCA_INVESTIGATE_ENTITY_TOOL_NAME,
|
||||
Pick<EntityInvestigation, 'entity' | 'summary' | 'relatedEntities'>,
|
||||
{ attachments: EntityInvestigation['attachments'] }
|
||||
>;
|
||||
|
||||
export type ToolErrorMessage = ToolMessage<
|
||||
'error',
|
||||
{
|
||||
error: {
|
||||
message: string;
|
||||
};
|
||||
}
|
||||
>;
|
||||
|
||||
export type RootCauseAnalysisEvent =
|
||||
| RootCauseAnalysisToolMessage
|
||||
| ToolErrorMessage
|
||||
| UserMessage
|
||||
| AssistantMessageOf<{
|
||||
tools: typeof RCA_TOOLS;
|
||||
toolChoice?: ToolChoice<keyof typeof RCA_TOOLS>;
|
||||
}>;
|
||||
|
||||
export type RootCauseAnalysisToolRequest<
|
||||
TToolName extends keyof typeof RCA_TOOLS = keyof typeof RCA_TOOLS
|
||||
> = ToolCallsOf<{
|
||||
tools: Pick<typeof RCA_TOOLS, TToolName>;
|
||||
}>['toolCalls'][number];
|
||||
|
||||
export type RootCauseAnalysisToolMessage =
|
||||
| EndProcessToolMessage
|
||||
| InvestigateEntityToolMessage
|
||||
| ObservationToolMessage;
|
||||
|
||||
export interface RootCauseAnalysisContext {
|
||||
initialContext: string;
|
||||
start: number;
|
||||
end: number;
|
||||
events: RootCauseAnalysisEvent[];
|
||||
indices: {
|
||||
logs: string[];
|
||||
traces: string[];
|
||||
sloSummaries: string[];
|
||||
};
|
||||
inferenceClient: InferenceClient;
|
||||
tokenLimit: number;
|
||||
connectorId: string;
|
||||
esClient: ObservabilityElasticsearchClient;
|
||||
alertsClient: AlertsClient;
|
||||
rulesClient: RulesClient;
|
||||
logger: Logger;
|
||||
spaceId: string;
|
||||
observabilityAIAssistantClient: ObservabilityAIAssistantClient;
|
||||
}
|
|
@ -0,0 +1,177 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import {
|
||||
Message,
|
||||
ToolDefinition,
|
||||
ToolChoice,
|
||||
ToolCallsOf,
|
||||
withoutChunkEvents,
|
||||
withoutTokenCountEvents,
|
||||
ToolMessage,
|
||||
MessageOf,
|
||||
MessageRole,
|
||||
} from '@kbn/inference-common';
|
||||
import { InferenceClient } from '@kbn/inference-plugin/server';
|
||||
import { Logger } from '@kbn/logging';
|
||||
import {
|
||||
defer,
|
||||
last,
|
||||
merge,
|
||||
Observable,
|
||||
of,
|
||||
OperatorFunction,
|
||||
share,
|
||||
switchMap,
|
||||
toArray,
|
||||
} from 'rxjs';
|
||||
|
||||
interface CallToolOptions extends CallToolTools {
|
||||
system: string;
|
||||
messages: Message[];
|
||||
inferenceClient: InferenceClient;
|
||||
connectorId: string;
|
||||
logger: Logger;
|
||||
}
|
||||
|
||||
interface CallToolTools {
|
||||
tools: Record<string, ToolDefinition>;
|
||||
toolChoice?: ToolChoice;
|
||||
}
|
||||
|
||||
type CallbackOf<
|
||||
TCallToolTools extends CallToolTools,
|
||||
TEmittedMessage extends Message
|
||||
> = (parameters: {
|
||||
messages: Message[];
|
||||
toolCalls: ToolCallsOf<TCallToolTools>['toolCalls'];
|
||||
}) => Observable<TEmittedMessage>;
|
||||
|
||||
type GetNextRequestCallback<TCallToolTools extends CallToolTools> = ({
|
||||
messages,
|
||||
system,
|
||||
}: {
|
||||
messages: Message[];
|
||||
system: string;
|
||||
}) => { system: string; messages: Message[] } & TCallToolTools;
|
||||
|
||||
export function callTools<TCallToolOptions extends CallToolOptions>(
|
||||
{ system, messages, inferenceClient, connectorId, tools, toolChoice, logger }: TCallToolOptions,
|
||||
callback: CallbackOf<TCallToolOptions, ToolMessage>
|
||||
): Observable<MessageOf<TCallToolOptions>>;
|
||||
|
||||
export function callTools<
|
||||
TCallToolOptions extends Omit<CallToolOptions, 'tools' | 'toolChoice'> = never,
|
||||
TCallToolTools extends CallToolTools = never,
|
||||
TEmittedMessage extends Message = never
|
||||
>(
|
||||
options: TCallToolOptions,
|
||||
getNextRequest: GetNextRequestCallback<TCallToolTools>,
|
||||
callback: CallbackOf<TCallToolTools, TEmittedMessage>
|
||||
): Observable<TEmittedMessage>;
|
||||
|
||||
export function callTools(
|
||||
{ system, messages, inferenceClient, connectorId, tools, toolChoice, logger }: CallToolOptions,
|
||||
...callbacks:
|
||||
| [GetNextRequestCallback<CallToolTools>, CallbackOf<CallToolOptions, ToolMessage>]
|
||||
| [CallbackOf<CallToolTools, ToolMessage>]
|
||||
): Observable<Message> {
|
||||
const callback = callbacks.length === 2 ? callbacks[1] : callbacks[0];
|
||||
|
||||
const getNextRequest =
|
||||
callbacks.length === 2
|
||||
? callbacks[0]
|
||||
: (next: { messages: Message[]; system: string }) => {
|
||||
return {
|
||||
...next,
|
||||
tools,
|
||||
toolChoice,
|
||||
};
|
||||
};
|
||||
|
||||
const nextRequest = getNextRequest({ system, messages });
|
||||
|
||||
const chatComplete$ = defer(() =>
|
||||
inferenceClient.chatComplete({
|
||||
connectorId,
|
||||
stream: true,
|
||||
...nextRequest,
|
||||
})
|
||||
);
|
||||
|
||||
const asCompletedMessages$ = chatComplete$.pipe(
|
||||
withoutChunkEvents(),
|
||||
withoutTokenCountEvents(),
|
||||
switchMap((event) => {
|
||||
return of({
|
||||
role: MessageRole.Assistant as const,
|
||||
content: event.content,
|
||||
toolCalls: event.toolCalls,
|
||||
});
|
||||
})
|
||||
);
|
||||
|
||||
const withToolResponses$ = asCompletedMessages$
|
||||
.pipe(
|
||||
switchMap((message) => {
|
||||
if (message.toolCalls.length) {
|
||||
return merge(
|
||||
of(message),
|
||||
callback({ toolCalls: message.toolCalls, messages: messages.concat(message) })
|
||||
);
|
||||
}
|
||||
return of(message);
|
||||
})
|
||||
)
|
||||
.pipe(handleNext());
|
||||
|
||||
return withToolResponses$;
|
||||
|
||||
function handleNext(): OperatorFunction<Message, Message> {
|
||||
return (source$) => {
|
||||
const shared$ = source$.pipe(share());
|
||||
|
||||
const next$ = merge(
|
||||
shared$,
|
||||
shared$.pipe(
|
||||
toArray(),
|
||||
last(),
|
||||
switchMap((nextMessages) => {
|
||||
logger.debug(() =>
|
||||
JSON.stringify(
|
||||
nextMessages.map((message) => {
|
||||
return {
|
||||
role: message.role,
|
||||
toolCalls: 'toolCalls' in message ? message.toolCalls : undefined,
|
||||
toolCallId: 'toolCallId' in message ? message.toolCallId : undefined,
|
||||
};
|
||||
})
|
||||
)
|
||||
);
|
||||
|
||||
if (nextMessages[nextMessages.length - 1].role !== MessageRole.Assistant) {
|
||||
const options: CallToolOptions = {
|
||||
system,
|
||||
connectorId,
|
||||
inferenceClient,
|
||||
messages: messages.concat(nextMessages),
|
||||
tools,
|
||||
toolChoice,
|
||||
logger,
|
||||
};
|
||||
const after$ = callTools(options, getNextRequest, callback);
|
||||
return after$;
|
||||
}
|
||||
return of();
|
||||
})
|
||||
)
|
||||
);
|
||||
|
||||
return next$;
|
||||
};
|
||||
}
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { encode } from 'gpt-tokenizer';
|
||||
import { uniqueId } from 'lodash';
|
||||
|
||||
interface TextWithId {
|
||||
id: string;
|
||||
text: string;
|
||||
}
|
||||
|
||||
interface Parameters {
|
||||
system: string;
|
||||
input: string;
|
||||
tokenLimit: number;
|
||||
}
|
||||
|
||||
interface ChunkedOutputRequest {
|
||||
input: string;
|
||||
system: string;
|
||||
}
|
||||
|
||||
export function chunkOutputCalls({}: Parameters & { texts: string[] }): Array<
|
||||
ChunkedOutputRequest & {
|
||||
texts: string[];
|
||||
}
|
||||
>;
|
||||
|
||||
export function chunkOutputCalls({}: Parameters & { texts: TextWithId[] }): Array<
|
||||
ChunkedOutputRequest & {
|
||||
texts: TextWithId[];
|
||||
}
|
||||
>;
|
||||
|
||||
export function chunkOutputCalls({
|
||||
system,
|
||||
input,
|
||||
texts,
|
||||
tokenLimit,
|
||||
}: Parameters & {
|
||||
texts: string[] | TextWithId[];
|
||||
}) {
|
||||
const inputAndSystemPromptCount = encode(system).length + encode(input).length;
|
||||
|
||||
if (!texts.length) {
|
||||
return [{ system, input, texts: [] }];
|
||||
}
|
||||
|
||||
const textWithIds = texts.map((text) => {
|
||||
if (typeof text === 'string') {
|
||||
return {
|
||||
id: uniqueId(),
|
||||
text,
|
||||
};
|
||||
}
|
||||
return text;
|
||||
});
|
||||
|
||||
const textsWithCount = textWithIds.map(({ text, id }) => ({
|
||||
tokenCount: encode(text).length,
|
||||
text,
|
||||
id,
|
||||
}));
|
||||
|
||||
const chunks: Array<{ tokenCount: number; texts: TextWithId[] }> = [];
|
||||
|
||||
textsWithCount.forEach(({ text, id, tokenCount }) => {
|
||||
let chunkWithRoomLeft = chunks.find((chunk) => {
|
||||
return chunk.tokenCount + tokenCount <= tokenLimit;
|
||||
});
|
||||
|
||||
if (!chunkWithRoomLeft) {
|
||||
chunkWithRoomLeft = { texts: [], tokenCount: inputAndSystemPromptCount };
|
||||
chunks.push(chunkWithRoomLeft);
|
||||
}
|
||||
chunkWithRoomLeft.texts.push({ text, id });
|
||||
chunkWithRoomLeft.tokenCount += tokenCount;
|
||||
});
|
||||
|
||||
const hasTextWithIds = texts.some((text) => typeof text !== 'string');
|
||||
|
||||
return chunks.map((chunk) => {
|
||||
const textsForChunk = hasTextWithIds
|
||||
? chunk.texts
|
||||
: chunk.texts.map((text) => (typeof text === 'string' ? text : text.text));
|
||||
|
||||
return {
|
||||
system,
|
||||
input,
|
||||
texts: textsForChunk,
|
||||
};
|
||||
});
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
export function formatEntity(entity: Record<string, string>) {
|
||||
return Object.entries(entity)
|
||||
.map(([field, value]) => `${field}:${value}`)
|
||||
.join('/');
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { MessageRole } from '@kbn/inference-common';
|
||||
import { RCA_INVESTIGATE_ENTITY_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis';
|
||||
import { InvestigateEntityToolMessage, RootCauseAnalysisContext } from '../types';
|
||||
|
||||
export function getPreviouslyInvestigatedEntities({
|
||||
events,
|
||||
}: Pick<RootCauseAnalysisContext, 'events'>) {
|
||||
const investigationToolResponses = events.filter(
|
||||
(event): event is InvestigateEntityToolMessage => {
|
||||
return event.role === MessageRole.Tool && event.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME;
|
||||
}
|
||||
);
|
||||
|
||||
return investigationToolResponses.map((event) => event.response.entity);
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { ScoredKnowledgeBaseEntry } from '../tasks/get_knowledge_base_entries';
|
||||
import { toBlockquote } from './to_blockquote';
|
||||
|
||||
export function serializeKnowledgeBaseEntries(entries: ScoredKnowledgeBaseEntry[]) {
|
||||
if (!entries.length) {
|
||||
return `## Knowledge base
|
||||
|
||||
No relevant knowledge base entries were found.
|
||||
`;
|
||||
}
|
||||
|
||||
const serializedEntries = entries
|
||||
.filter((entry) => entry.score >= 3)
|
||||
.map(
|
||||
(entry) => `## Entry \`${entry.id}\ (score: ${entry.score}, ${
|
||||
entry.truncated ? `truncated` : `not truncated`
|
||||
})
|
||||
|
||||
${toBlockquote(entry.text)}`
|
||||
);
|
||||
|
||||
return `## Knowledge base
|
||||
|
||||
The following relevant entries were found in the knowledge base
|
||||
|
||||
${serializedEntries.join('\n\n')}`;
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { compact } from 'lodash';
|
||||
import { MessageRole } from '@kbn/inference-common';
|
||||
import { RCA_OBSERVE_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis';
|
||||
import { formatEntity } from './format_entity';
|
||||
import { toBlockquote } from './to_blockquote';
|
||||
import { ObservationToolMessage, RootCauseAnalysisContext } from '../types';
|
||||
|
||||
export function stringifySummaries({ events }: RootCauseAnalysisContext): string {
|
||||
const summaries = events
|
||||
.filter((event): event is ObservationToolMessage => {
|
||||
return event.role === MessageRole.Tool && event.name === RCA_OBSERVE_TOOL_NAME;
|
||||
})
|
||||
.map((event) => event.data);
|
||||
|
||||
if (!summaries.length) {
|
||||
return `# Previous observations
|
||||
|
||||
No previous observations`;
|
||||
}
|
||||
|
||||
return `# Previous observations
|
||||
|
||||
${summaries.map((summary, index) => {
|
||||
const header = `## Observation #${index + 1}`;
|
||||
|
||||
const entitiesHeader = summary.investigations.length
|
||||
? `### Investigated entities
|
||||
|
||||
${summary.investigations
|
||||
.map((investigation) => `- ${formatEntity(investigation.entity)}`)
|
||||
.join('\n')}`
|
||||
: undefined;
|
||||
|
||||
const summaryBody = `### Summary
|
||||
|
||||
${toBlockquote(summary.content)}`;
|
||||
|
||||
return compact([header, entitiesHeader, summaryBody]).join('\n\n');
|
||||
})}`;
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
export function toBlockquote(input: string): string {
|
||||
return input
|
||||
.split('\n')
|
||||
.map((line) => `> ${line}`)
|
||||
.join('\n');
|
||||
}
|
|
@ -0,0 +1,124 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { MessageRole, ToolCallsOf } from '@kbn/inference-common';
|
||||
import { entityQuery } from '@kbn/observability-utils-common/es/queries/entity_query';
|
||||
import { RCA_INVESTIGATE_ENTITY_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis';
|
||||
import { isEqual } from 'lodash';
|
||||
import { getEntitiesByFuzzySearch } from '@kbn/observability-utils-server/entities/get_entities_by_fuzzy_search';
|
||||
import { RCA_TOOLS } from '../tools';
|
||||
import {
|
||||
InvestigateEntityToolMessage,
|
||||
RootCauseAnalysisContext,
|
||||
RootCauseAnalysisToolRequest,
|
||||
} from '../types';
|
||||
import { formatEntity } from './format_entity';
|
||||
|
||||
interface EntityExistsResultExists {
|
||||
exists: true;
|
||||
entity: Record<string, string>;
|
||||
}
|
||||
|
||||
interface EntityExistsResultDoesNotExist {
|
||||
exists: false;
|
||||
entity: Record<string, string>;
|
||||
suggestions: string[];
|
||||
}
|
||||
|
||||
type EntityExistsResult = EntityExistsResultExists | EntityExistsResultDoesNotExist;
|
||||
|
||||
export async function validateInvestigateEntityToolCalls({
|
||||
rcaContext,
|
||||
toolCalls,
|
||||
}: {
|
||||
rcaContext: Pick<RootCauseAnalysisContext, 'esClient' | 'indices' | 'start' | 'end' | 'events'>;
|
||||
toolCalls: RootCauseAnalysisToolRequest[];
|
||||
}) {
|
||||
const { events, esClient, indices, start, end } = rcaContext;
|
||||
|
||||
const previouslyInvestigatedEntities = events
|
||||
.filter(
|
||||
(event): event is InvestigateEntityToolMessage =>
|
||||
event.role === MessageRole.Tool && event.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME
|
||||
)
|
||||
.map((toolResponse) => toolResponse.response.entity);
|
||||
|
||||
const investigateEntityToolCalls = toolCalls.filter(
|
||||
(
|
||||
toolCall
|
||||
): toolCall is ToolCallsOf<{
|
||||
tools: Pick<typeof RCA_TOOLS, typeof RCA_INVESTIGATE_ENTITY_TOOL_NAME>;
|
||||
}>['toolCalls'][number] => toolCall.function.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME
|
||||
);
|
||||
|
||||
if (!investigateEntityToolCalls.length) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const entitiesToInvestigate = investigateEntityToolCalls.map((toolCall) => {
|
||||
const { entity: entityToInvestigate } = toolCall.function.arguments;
|
||||
return {
|
||||
[entityToInvestigate.field]: entityToInvestigate.value,
|
||||
};
|
||||
});
|
||||
const entityExistsResponses: EntityExistsResult[] = await Promise.all(
|
||||
entitiesToInvestigate.map(async (entity) => {
|
||||
const response = await esClient.search('find_data_for_entity', {
|
||||
track_total_hits: 1,
|
||||
size: 0,
|
||||
timeout: '1ms',
|
||||
index: indices.logs.concat(indices.traces),
|
||||
query: {
|
||||
bool: {
|
||||
filter: [...entityQuery(entity)],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const exists = response.hits.total.value > 0;
|
||||
if (!exists) {
|
||||
return getEntitiesByFuzzySearch({
|
||||
start,
|
||||
end,
|
||||
esClient,
|
||||
index: indices.logs.concat(indices.traces),
|
||||
entity,
|
||||
}).then((suggestions) => {
|
||||
return {
|
||||
entity,
|
||||
exists,
|
||||
suggestions,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
return { entity, exists };
|
||||
})
|
||||
);
|
||||
|
||||
const alreadyInvestigatedEntities = entitiesToInvestigate.filter((entity) => {
|
||||
return previouslyInvestigatedEntities.some((prevEntity) => isEqual(entity, prevEntity));
|
||||
});
|
||||
|
||||
const errors = [
|
||||
...entityExistsResponses
|
||||
.filter(
|
||||
(entityExistsResult): entityExistsResult is EntityExistsResultDoesNotExist =>
|
||||
!entityExistsResult.exists
|
||||
)
|
||||
.map(({ suggestions, entity }) => {
|
||||
return `Entity ${formatEntity(
|
||||
entity
|
||||
)} does not exist. Did you mean one of ${suggestions.join(', ')}?`;
|
||||
}),
|
||||
...alreadyInvestigatedEntities.map((entity) => {
|
||||
return `Entity ${formatEntity(entity)} was already investigated before.`;
|
||||
}),
|
||||
];
|
||||
|
||||
return errors;
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
{
|
||||
"extends": "../../../../../tsconfig.base.json",
|
||||
"compilerOptions": {
|
||||
"outDir": "target/types",
|
||||
"types": [
|
||||
"jest",
|
||||
"node",
|
||||
"react"
|
||||
]
|
||||
},
|
||||
"include": [
|
||||
"**/*.ts"
|
||||
],
|
||||
"exclude": [
|
||||
"target/**/*"
|
||||
],
|
||||
"kbn_references": [
|
||||
"@kbn/observability-utils-common",
|
||||
"@kbn/alerting-plugin",
|
||||
"@kbn/rule-registry-plugin",
|
||||
"@kbn/inference-plugin",
|
||||
"@kbn/logging",
|
||||
"@kbn/calculate-auto",
|
||||
"@kbn/observability-ai-assistant-plugin",
|
||||
"@kbn/inference-common",
|
||||
"@kbn/observability-ai-common",
|
||||
"@kbn/observability-utils-server",
|
||||
]
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { castArray, orderBy } from 'lodash';
|
||||
import Fuse from 'fuse.js';
|
||||
import { ObservabilityElasticsearchClient } from '../es/client/create_observability_es_client';
|
||||
|
||||
export async function getEntitiesByFuzzySearch({
|
||||
esClient,
|
||||
entity,
|
||||
start,
|
||||
end,
|
||||
index,
|
||||
}: {
|
||||
esClient: ObservabilityElasticsearchClient;
|
||||
entity: Record<string, string>;
|
||||
start: number;
|
||||
end: number;
|
||||
index: string | string[];
|
||||
}): Promise<string[]> {
|
||||
if (Object.keys(entity).length > 1) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const [field, value] = Object.entries(entity)[0];
|
||||
|
||||
const { terms } = await esClient.client.termsEnum({
|
||||
index: castArray(index).join(','),
|
||||
field,
|
||||
index_filter: {
|
||||
range: {
|
||||
'@timestamp': {
|
||||
gte: new Date(start).toISOString(),
|
||||
lte: new Date(end).toISOString(),
|
||||
},
|
||||
},
|
||||
},
|
||||
size: 10_000,
|
||||
});
|
||||
|
||||
const results = new Fuse(terms, { includeScore: true, threshold: 0.75 }).search(value);
|
||||
|
||||
return orderBy(results, (result) => result.score, 'asc')
|
||||
.slice(0, 5)
|
||||
.map((result) => result.item);
|
||||
}
|
|
@ -0,0 +1,405 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import {
|
||||
AggregationsCategorizeTextAggregation,
|
||||
AggregationsDateHistogramAggregation,
|
||||
AggregationsMaxAggregation,
|
||||
AggregationsMinAggregation,
|
||||
AggregationsTopHitsAggregation,
|
||||
QueryDslQueryContainer,
|
||||
} from '@elastic/elasticsearch/lib/api/types';
|
||||
import { categorizationAnalyzer } from '@kbn/aiops-log-pattern-analysis/categorization_analyzer';
|
||||
import { ChangePointType } from '@kbn/es-types/src';
|
||||
import { pValueToLabel } from '@kbn/observability-utils-common/ml/p_value_to_label';
|
||||
import { calculateAuto } from '@kbn/calculate-auto';
|
||||
import { omit, orderBy, uniqBy } from 'lodash';
|
||||
import moment from 'moment';
|
||||
import { ObservabilityElasticsearchClient } from '../es/client/create_observability_es_client';
|
||||
import { kqlQuery } from '../es/queries/kql_query';
|
||||
import { rangeQuery } from '../es/queries/range_query';
|
||||
|
||||
interface FieldPatternResultBase {
|
||||
field: string;
|
||||
count: number;
|
||||
pattern: string;
|
||||
regex: string;
|
||||
sample: string;
|
||||
firstOccurrence: string;
|
||||
lastOccurrence: string;
|
||||
highlight: Record<string, string[]>;
|
||||
metadata: Record<string, unknown[]>;
|
||||
}
|
||||
|
||||
interface FieldPatternResultChanges {
|
||||
timeseries: Array<{ x: number; y: number }>;
|
||||
change: {
|
||||
timestamp?: string;
|
||||
significance: 'high' | 'medium' | 'low' | null;
|
||||
type: ChangePointType;
|
||||
change_point?: number;
|
||||
p_value?: number;
|
||||
};
|
||||
}
|
||||
|
||||
export type FieldPatternResult<TChanges extends boolean | undefined = undefined> =
|
||||
FieldPatternResultBase & (TChanges extends true ? FieldPatternResultChanges : {});
|
||||
|
||||
export type FieldPatternResultWithChanges = FieldPatternResult<true>;
|
||||
|
||||
interface CategorizeTextOptions {
|
||||
query: QueryDslQueryContainer;
|
||||
metadata: string[];
|
||||
esClient: ObservabilityElasticsearchClient;
|
||||
samplingProbability: number;
|
||||
fields: string[];
|
||||
index: string | string[];
|
||||
useMlStandardTokenizer: boolean;
|
||||
size: number;
|
||||
start: number;
|
||||
end: number;
|
||||
}
|
||||
// eslint-disable-next-line @typescript-eslint/consistent-type-definitions
|
||||
type CategorizeTextSubAggregations = {
|
||||
sample: { top_hits: AggregationsTopHitsAggregation };
|
||||
minTimestamp: { min: AggregationsMinAggregation };
|
||||
maxTimestamp: { max: AggregationsMaxAggregation };
|
||||
};
|
||||
|
||||
interface CategorizeTextAggregationResult {
|
||||
categorize_text: AggregationsCategorizeTextAggregation;
|
||||
aggs: CategorizeTextSubAggregations &
|
||||
(
|
||||
| {}
|
||||
| {
|
||||
timeseries: { date_histogram: AggregationsDateHistogramAggregation };
|
||||
changes: { change_point: { buckets_path: string } };
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
export async function runCategorizeTextAggregation<
|
||||
TChanges extends boolean | undefined = undefined
|
||||
>(
|
||||
options: CategorizeTextOptions & { includeChanges?: TChanges }
|
||||
): Promise<Array<FieldPatternResult<TChanges>>>;
|
||||
|
||||
export async function runCategorizeTextAggregation({
|
||||
esClient,
|
||||
fields,
|
||||
metadata,
|
||||
index,
|
||||
query,
|
||||
samplingProbability,
|
||||
useMlStandardTokenizer,
|
||||
includeChanges,
|
||||
size,
|
||||
start,
|
||||
end,
|
||||
}: CategorizeTextOptions & { includeChanges?: boolean }): Promise<
|
||||
Array<FieldPatternResult<boolean>>
|
||||
> {
|
||||
const aggs = Object.fromEntries(
|
||||
fields.map((field): [string, CategorizeTextAggregationResult] => [
|
||||
field,
|
||||
{
|
||||
categorize_text: {
|
||||
field,
|
||||
min_doc_count: 1,
|
||||
size,
|
||||
categorization_analyzer: useMlStandardTokenizer
|
||||
? {
|
||||
tokenizer: 'ml_standard',
|
||||
char_filter: [
|
||||
{
|
||||
type: 'pattern_replace',
|
||||
pattern: '\\\\n',
|
||||
replacement: '',
|
||||
} as unknown as string,
|
||||
],
|
||||
}
|
||||
: categorizationAnalyzer,
|
||||
},
|
||||
aggs: {
|
||||
minTimestamp: {
|
||||
min: {
|
||||
field: '@timestamp',
|
||||
},
|
||||
},
|
||||
maxTimestamp: {
|
||||
max: {
|
||||
field: '@timestamp',
|
||||
},
|
||||
},
|
||||
...(includeChanges
|
||||
? {
|
||||
timeseries: {
|
||||
date_histogram: {
|
||||
field: '@timestamp',
|
||||
min_doc_count: 0,
|
||||
extended_bounds: {
|
||||
min: start,
|
||||
max: end,
|
||||
},
|
||||
fixed_interval: `${calculateAuto
|
||||
.atLeast(30, moment.duration(end - start, 'ms'))!
|
||||
.asMilliseconds()}ms`,
|
||||
},
|
||||
},
|
||||
changes: {
|
||||
change_point: {
|
||||
buckets_path: 'timeseries>_count',
|
||||
},
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
sample: {
|
||||
top_hits: {
|
||||
size: 1,
|
||||
_source: false,
|
||||
fields: [field, ...metadata],
|
||||
sort: {
|
||||
_score: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
highlight: {
|
||||
fields: {
|
||||
'*': {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
])
|
||||
);
|
||||
|
||||
const response = await esClient.search('get_log_patterns', {
|
||||
index,
|
||||
size: 0,
|
||||
track_total_hits: false,
|
||||
query: {
|
||||
bool: {
|
||||
filter: [query, ...rangeQuery(start, end)],
|
||||
},
|
||||
},
|
||||
aggregations: {
|
||||
sampler: {
|
||||
random_sampler: {
|
||||
probability: samplingProbability,
|
||||
},
|
||||
aggs,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.aggregations) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const fieldAggregates = omit(response.aggregations.sampler, 'seed', 'doc_count', 'probability');
|
||||
|
||||
return Object.entries(fieldAggregates).flatMap(([fieldName, aggregate]) => {
|
||||
const buckets = aggregate.buckets;
|
||||
|
||||
return buckets.map((bucket) => {
|
||||
return {
|
||||
field: fieldName,
|
||||
count: bucket.doc_count,
|
||||
pattern: bucket.key,
|
||||
regex: bucket.regex,
|
||||
sample: bucket.sample.hits.hits[0].fields![fieldName][0] as string,
|
||||
highlight: bucket.sample.hits.hits[0].highlight ?? {},
|
||||
metadata: bucket.sample.hits.hits[0].fields!,
|
||||
firstOccurrence: new Date(bucket.minTimestamp.value!).toISOString(),
|
||||
lastOccurrence: new Date(bucket.maxTimestamp.value!).toISOString(),
|
||||
...('timeseries' in bucket
|
||||
? {
|
||||
timeseries: bucket.timeseries.buckets.map((dateBucket) => ({
|
||||
x: dateBucket.key,
|
||||
y: dateBucket.doc_count,
|
||||
})),
|
||||
change: Object.entries(bucket.changes.type).map(
|
||||
([changePointType, change]): FieldPatternResultChanges['change'] => {
|
||||
return {
|
||||
type: changePointType as ChangePointType,
|
||||
significance:
|
||||
change.p_value !== undefined ? pValueToLabel(change.p_value) : null,
|
||||
change_point: change.change_point,
|
||||
p_value: change.p_value,
|
||||
timestamp:
|
||||
change.change_point !== undefined
|
||||
? bucket.timeseries.buckets[change.change_point].key_as_string
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
)[0],
|
||||
}
|
||||
: {}),
|
||||
};
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
interface LogPatternOptions {
|
||||
esClient: ObservabilityElasticsearchClient;
|
||||
start: number;
|
||||
end: number;
|
||||
index: string | string[];
|
||||
kuery: string;
|
||||
metadata?: string[];
|
||||
fields: string[];
|
||||
}
|
||||
|
||||
export async function getLogPatterns<TChanges extends boolean | undefined = undefined>(
|
||||
options: LogPatternOptions & { includeChanges?: TChanges }
|
||||
): Promise<Array<FieldPatternResult<TChanges>>>;
|
||||
|
||||
export async function getLogPatterns({
|
||||
esClient,
|
||||
start,
|
||||
end,
|
||||
index,
|
||||
kuery,
|
||||
includeChanges,
|
||||
metadata = [],
|
||||
fields,
|
||||
}: LogPatternOptions & { includeChanges?: boolean }): Promise<Array<FieldPatternResult<boolean>>> {
|
||||
const fieldCapsResponse = await esClient.fieldCaps('get_field_caps_for_log_pattern_analysis', {
|
||||
fields,
|
||||
index_filter: {
|
||||
bool: {
|
||||
filter: [...rangeQuery(start, end)],
|
||||
},
|
||||
},
|
||||
index,
|
||||
types: ['text', 'match_only_text'],
|
||||
});
|
||||
|
||||
const fieldsInFieldCaps = Object.keys(fieldCapsResponse.fields);
|
||||
|
||||
if (!fieldsInFieldCaps.length) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const totalDocsResponse = await esClient.search('get_total_docs_for_log_pattern_analysis', {
|
||||
index,
|
||||
size: 0,
|
||||
track_total_hits: true,
|
||||
query: {
|
||||
bool: {
|
||||
filter: [...kqlQuery(kuery), ...rangeQuery(start, end)],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const totalHits = totalDocsResponse.hits.total.value;
|
||||
|
||||
if (totalHits === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
let samplingProbability = 100_000 / totalHits;
|
||||
|
||||
if (samplingProbability >= 0.5) {
|
||||
samplingProbability = 1;
|
||||
}
|
||||
|
||||
const fieldGroups = includeChanges
|
||||
? fieldsInFieldCaps.map((field) => [field])
|
||||
: [fieldsInFieldCaps];
|
||||
|
||||
const allPatterns = await Promise.all(
|
||||
fieldGroups.map(async (fieldGroup) => {
|
||||
const topMessagePatterns = await runCategorizeTextAggregation({
|
||||
esClient,
|
||||
index,
|
||||
fields: fieldGroup,
|
||||
query: {
|
||||
bool: {
|
||||
filter: kqlQuery(kuery),
|
||||
},
|
||||
},
|
||||
samplingProbability,
|
||||
useMlStandardTokenizer: false,
|
||||
size: 100,
|
||||
start,
|
||||
end,
|
||||
includeChanges,
|
||||
metadata,
|
||||
});
|
||||
|
||||
if (topMessagePatterns.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const patternsToExclude = topMessagePatterns.filter((pattern) => {
|
||||
// elasticsearch will barf because the query is too complex. this measures
|
||||
// the # of groups to capture for a measure of complexity.
|
||||
const complexity = pattern.regex.match(/(\.\+\?)|(\.\*\?)/g)?.length ?? 0;
|
||||
return (
|
||||
complexity <= 25 &&
|
||||
// anything less than 50 messages should be re-processed with the ml_standard tokenizer
|
||||
pattern.count > 50
|
||||
);
|
||||
});
|
||||
|
||||
const rareMessagePatterns = await runCategorizeTextAggregation({
|
||||
esClient,
|
||||
index,
|
||||
fields: fieldGroup,
|
||||
start,
|
||||
end,
|
||||
query: {
|
||||
bool: {
|
||||
filter: kqlQuery(kuery),
|
||||
must_not: [
|
||||
...patternsToExclude.map((pattern) => {
|
||||
return {
|
||||
bool: {
|
||||
filter: [
|
||||
{
|
||||
regexp: {
|
||||
[pattern.field]: {
|
||||
value: pattern.regex,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
match: {
|
||||
[pattern.field]: {
|
||||
query: pattern.pattern,
|
||||
fuzziness: 0,
|
||||
operator: 'and' as const,
|
||||
auto_generate_synonyms_phrase_query: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
}),
|
||||
],
|
||||
},
|
||||
},
|
||||
size: 1000,
|
||||
includeChanges,
|
||||
samplingProbability: 1,
|
||||
useMlStandardTokenizer: true,
|
||||
metadata,
|
||||
});
|
||||
|
||||
return [...patternsToExclude, ...rareMessagePatterns];
|
||||
})
|
||||
);
|
||||
|
||||
return uniqBy(
|
||||
orderBy(allPatterns.flat(), (pattern) => pattern.count, 'desc'),
|
||||
(pattern) => pattern.sample
|
||||
);
|
||||
}
|
|
@ -24,6 +24,8 @@
|
|||
"@kbn/alerting-plugin",
|
||||
"@kbn/rule-registry-plugin",
|
||||
"@kbn/rule-data-utils",
|
||||
"@kbn/aiops-log-pattern-analysis",
|
||||
"@kbn/calculate-auto",
|
||||
"@kbn/utility-types",
|
||||
"@kbn/task-manager-plugin",
|
||||
]
|
||||
|
|
|
@ -13,6 +13,9 @@ export {
|
|||
type AssistantMessage,
|
||||
type ToolMessage,
|
||||
type UserMessage,
|
||||
type MessageOf,
|
||||
type AssistantMessageOf,
|
||||
type ToolMessageOf,
|
||||
type ToolSchemaType,
|
||||
type FromToolSchema,
|
||||
type ToolSchema,
|
||||
|
|
|
@ -33,6 +33,9 @@ export {
|
|||
type AssistantMessage,
|
||||
type UserMessage,
|
||||
type ToolMessage,
|
||||
type AssistantMessageOf,
|
||||
type MessageOf,
|
||||
type ToolMessageOf,
|
||||
} from './messages';
|
||||
export { type ToolSchema, type ToolSchemaType, type FromToolSchema } from './tool_schema';
|
||||
export {
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ToolCall } from './tools';
|
||||
import type { ToolCall, ToolCallsOf, ToolNamesOf, ToolOptions, ToolResponsesOf } from './tools';
|
||||
|
||||
/**
|
||||
* Enum for all possible {@link Message} roles.
|
||||
|
@ -52,17 +52,32 @@ export type AssistantMessage = MessageBase<MessageRole.Assistant> & {
|
|||
/**
|
||||
* Represents a tool invocation result, following a request from the LLM to execute a tool.
|
||||
*/
|
||||
export type ToolMessage<TToolResponse extends Record<string, any> | unknown> =
|
||||
MessageBase<MessageRole.Tool> & {
|
||||
/**
|
||||
* The call id matching the {@link ToolCall} this tool message is for.
|
||||
*/
|
||||
toolCallId: string;
|
||||
/**
|
||||
* The response from the tool invocation.
|
||||
*/
|
||||
response: TToolResponse;
|
||||
};
|
||||
export type ToolMessage<
|
||||
TName extends string = string,
|
||||
TToolResponse extends Record<string, any> | unknown = Record<string, any> | unknown,
|
||||
TToolData extends Record<string, any> | undefined = Record<string, any> | undefined
|
||||
> = MessageBase<MessageRole.Tool> & {
|
||||
/*
|
||||
* The name of the tool called. Used for refining the type of the response.
|
||||
*/
|
||||
name: TName;
|
||||
/**
|
||||
* The call id matching the {@link ToolCall} this tool message is for.
|
||||
*/
|
||||
toolCallId: string;
|
||||
/**
|
||||
* The response from the tool invocation.
|
||||
*/
|
||||
response: TToolResponse;
|
||||
} & (TToolData extends undefined
|
||||
? {}
|
||||
: {
|
||||
/**
|
||||
* Additional data from the tool invocation, that is not sent to the LLM
|
||||
* but can be used to attach baggage (such as timeseries or debug data)
|
||||
*/
|
||||
data: TToolData;
|
||||
});
|
||||
|
||||
/**
|
||||
* Mixin composed of all the possible types of messages in a chatComplete discussion.
|
||||
|
@ -72,4 +87,30 @@ export type ToolMessage<TToolResponse extends Record<string, any> | unknown> =
|
|||
* - {@link AssistantMessage}
|
||||
* - {@link ToolMessage}
|
||||
*/
|
||||
export type Message = UserMessage | AssistantMessage | ToolMessage<unknown>;
|
||||
export type Message = UserMessage | AssistantMessage | ToolMessage;
|
||||
|
||||
/**
|
||||
* Utility type to get the Assistant message type of a {@link ToolOptions} type.
|
||||
*/
|
||||
export type AssistantMessageOf<TToolOptions extends ToolOptions> = Omit<
|
||||
AssistantMessage,
|
||||
'toolCalls'
|
||||
> &
|
||||
ToolCallsOf<TToolOptions>;
|
||||
|
||||
/**
|
||||
* Utility type to get the Tool message type of a {@link ToolOptions} type.
|
||||
*/
|
||||
|
||||
export type ToolMessageOf<TToolOptions extends ToolOptions> = ToolMessage<
|
||||
ToolNamesOf<TToolOptions>,
|
||||
ToolResponsesOf<TToolOptions['tools']>
|
||||
>;
|
||||
|
||||
/**
|
||||
* Utility type to get the mixin Message type of a {@link ToolOptions} type.
|
||||
*/
|
||||
export type MessageOf<TToolOptions extends ToolOptions> =
|
||||
| UserMessage
|
||||
| AssistantMessageOf<TToolOptions>
|
||||
| ToolMessageOf<TToolOptions>;
|
||||
|
|
|
@ -8,24 +8,24 @@
|
|||
import type { ValuesType } from 'utility-types';
|
||||
import { FromToolSchema, ToolSchema } from './tool_schema';
|
||||
|
||||
type Assert<TValue, TType> = TValue extends TType ? TValue & TType : never;
|
||||
|
||||
type ToolsOfChoice<TToolOptions extends ToolOptions> = TToolOptions['toolChoice'] extends {
|
||||
function: infer TToolName;
|
||||
}
|
||||
? TToolName extends keyof TToolOptions['tools']
|
||||
? Pick<TToolOptions['tools'], TToolName>
|
||||
? TToolName extends string
|
||||
? Pick<TToolOptions['tools'], TToolName>
|
||||
: TToolOptions['tools']
|
||||
: TToolOptions['tools']
|
||||
: TToolOptions['tools'];
|
||||
|
||||
/**
|
||||
* Utility type to infer the tool calls response shape.
|
||||
*/
|
||||
type ToolResponsesOf<TTools extends Record<string, ToolDefinition> | undefined> =
|
||||
export type ToolResponsesOf<TTools extends Record<string, ToolDefinition> | undefined> =
|
||||
TTools extends Record<string, ToolDefinition>
|
||||
? Array<
|
||||
ValuesType<{
|
||||
[TName in keyof TTools]: ToolResponseOf<Assert<TName, string>, TTools[TName]>;
|
||||
[TName in keyof TTools & string]: ToolCall<TName, ToolResponseOf<TTools[TName]>>;
|
||||
}>
|
||||
>
|
||||
: never[];
|
||||
|
@ -33,10 +33,11 @@ type ToolResponsesOf<TTools extends Record<string, ToolDefinition> | undefined>
|
|||
/**
|
||||
* Utility type to infer the tool call response shape.
|
||||
*/
|
||||
type ToolResponseOf<TName extends string, TToolDefinition extends ToolDefinition> = ToolCall<
|
||||
TName,
|
||||
TToolDefinition extends { schema: ToolSchema } ? FromToolSchema<TToolDefinition['schema']> : {}
|
||||
>;
|
||||
export type ToolResponseOf<TToolDefinition extends ToolDefinition> = TToolDefinition extends {
|
||||
schema: ToolSchema;
|
||||
}
|
||||
? FromToolSchema<TToolDefinition['schema']>
|
||||
: {};
|
||||
|
||||
/**
|
||||
* Tool invocation choice type.
|
||||
|
@ -129,6 +130,10 @@ export interface ToolCall<
|
|||
name: TName;
|
||||
} & (TArguments extends Record<string, any> ? { arguments: TArguments } : {});
|
||||
}
|
||||
/**
|
||||
* Utility type to get the tool names of ToolOptions
|
||||
*/
|
||||
export type ToolNamesOf<TToolOptions extends ToolOptions> = keyof TToolOptions['tools'] & string;
|
||||
|
||||
/**
|
||||
* Tool-related parameters of {@link ChatCompleteAPI}
|
||||
|
|
|
@ -96,6 +96,17 @@ export interface OutputOptions<
|
|||
* Defaults to false.
|
||||
*/
|
||||
stream?: TStream;
|
||||
|
||||
/**
|
||||
* Optional configuration for retrying the call if an error occurs.
|
||||
*/
|
||||
retry?: {
|
||||
/**
|
||||
* Whether to retry on validation errors. Can be a number or retries,
|
||||
* or a boolean, which means one retry.
|
||||
*/
|
||||
onValidationError?: boolean | number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -3,4 +3,4 @@
|
|||
"private": true,
|
||||
"version": "1.0.0",
|
||||
"license": "Elastic License 2.0"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { AggregationsCustomCategorizeTextAnalyzer } from '@elastic/elasticsearch/lib/api/types';
|
||||
|
||||
// This is a copy of the default categorization analyzer but using the 'standard' tokenizer rather than the 'ml_standard' tokenizer.
|
||||
// The 'ml_standard' tokenizer splits tokens in a way that was observed to give better categories in testing many years ago, however,
|
||||
// the downside of these better categories is then a potential failure to match the original documents when creating a filter for Discover.
|
||||
// A future enhancement would be to check which analyzer is specified in the mappings for the source field and to use
|
||||
// that instead of unconditionally using 'standard'.
|
||||
// However for an initial fix, using the standard analyzer will be more likely to match the results from the majority of searches.
|
||||
export const categorizationAnalyzer: AggregationsCustomCategorizeTextAnalyzer = {
|
||||
char_filter: ['first_line_with_letters'],
|
||||
tokenizer: 'standard',
|
||||
filter: [
|
||||
// @ts-expect-error filter type in AggregationsCustomCategorizeTextAnalyzer is incorrect
|
||||
{
|
||||
type: 'stop',
|
||||
stopwords: [
|
||||
'Monday',
|
||||
'Tuesday',
|
||||
'Wednesday',
|
||||
'Thursday',
|
||||
'Friday',
|
||||
'Saturday',
|
||||
'Sunday',
|
||||
'Mon',
|
||||
'Tue',
|
||||
'Wed',
|
||||
'Thu',
|
||||
'Fri',
|
||||
'Sat',
|
||||
'Sun',
|
||||
'January',
|
||||
'February',
|
||||
'March',
|
||||
'April',
|
||||
'May',
|
||||
'June',
|
||||
'July',
|
||||
'August',
|
||||
'September',
|
||||
'October',
|
||||
'November',
|
||||
'December',
|
||||
'Jan',
|
||||
'Feb',
|
||||
'Mar',
|
||||
'Apr',
|
||||
'May',
|
||||
'Jun',
|
||||
'Jul',
|
||||
'Aug',
|
||||
'Sep',
|
||||
'Oct',
|
||||
'Nov',
|
||||
'Dec',
|
||||
'GMT',
|
||||
'UTC',
|
||||
],
|
||||
},
|
||||
// @ts-expect-error filter type in AggregationsCustomCategorizeTextAnalyzer is incorrect
|
||||
{
|
||||
type: 'limit',
|
||||
max_token_count: '100',
|
||||
},
|
||||
],
|
||||
};
|
|
@ -5,16 +5,14 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import type {
|
||||
QueryDslQueryContainer,
|
||||
AggregationsCustomCategorizeTextAnalyzer,
|
||||
} from '@elastic/elasticsearch/lib/api/types';
|
||||
import type { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/types';
|
||||
import type { MappingRuntimeFields } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
|
||||
import { isPopulatedObject } from '@kbn/ml-is-populated-object/src/is_populated_object';
|
||||
|
||||
import type { createRandomSamplerWrapper } from '@kbn/ml-random-sampler-utils';
|
||||
|
||||
import { createDefaultQuery } from '@kbn/aiops-common/create_default_query';
|
||||
import { categorizationAnalyzer } from './categorization_analyzer';
|
||||
|
||||
const CATEGORY_LIMIT = 1000;
|
||||
const EXAMPLE_LIMIT = 4;
|
||||
|
@ -121,67 +119,3 @@ export function createCategoryRequest(
|
|||
},
|
||||
};
|
||||
}
|
||||
|
||||
// This is a copy of the default categorization analyzer but using the 'standard' tokenizer rather than the 'ml_standard' tokenizer.
|
||||
// The 'ml_standard' tokenizer splits tokens in a way that was observed to give better categories in testing many years ago, however,
|
||||
// the downside of these better categories is then a potential failure to match the original documents when creating a filter for Discover.
|
||||
// A future enhancement would be to check which analyzer is specified in the mappings for the source field and to use
|
||||
// that instead of unconditionally using 'standard'.
|
||||
// However for an initial fix, using the standard analyzer will be more likely to match the results from the majority of searches.
|
||||
const categorizationAnalyzer: AggregationsCustomCategorizeTextAnalyzer = {
|
||||
char_filter: ['first_line_with_letters'],
|
||||
tokenizer: 'standard',
|
||||
filter: [
|
||||
// @ts-expect-error filter type in AggregationsCustomCategorizeTextAnalyzer is incorrect
|
||||
{
|
||||
type: 'stop',
|
||||
stopwords: [
|
||||
'Monday',
|
||||
'Tuesday',
|
||||
'Wednesday',
|
||||
'Thursday',
|
||||
'Friday',
|
||||
'Saturday',
|
||||
'Sunday',
|
||||
'Mon',
|
||||
'Tue',
|
||||
'Wed',
|
||||
'Thu',
|
||||
'Fri',
|
||||
'Sat',
|
||||
'Sun',
|
||||
'January',
|
||||
'February',
|
||||
'March',
|
||||
'April',
|
||||
'May',
|
||||
'June',
|
||||
'July',
|
||||
'August',
|
||||
'September',
|
||||
'October',
|
||||
'November',
|
||||
'December',
|
||||
'Jan',
|
||||
'Feb',
|
||||
'Mar',
|
||||
'Apr',
|
||||
'May',
|
||||
'Jun',
|
||||
'Jul',
|
||||
'Aug',
|
||||
'Sep',
|
||||
'Oct',
|
||||
'Nov',
|
||||
'Dec',
|
||||
'GMT',
|
||||
'UTC',
|
||||
],
|
||||
},
|
||||
// @ts-expect-error filter type in AggregationsCustomCategorizeTextAnalyzer is incorrect
|
||||
{
|
||||
type: 'limit',
|
||||
max_token_count: '100',
|
||||
},
|
||||
],
|
||||
};
|
||||
|
|
|
@ -26099,7 +26099,6 @@
|
|||
"xpack.investigateApp.addObservationUI.h2.addAnObservationChartLabel": "Ajouter un graphique d'observation",
|
||||
"xpack.investigateApp.addObservationUI.p.selectADataSourceLabel": "Sélectionnez une source de données pour générer un graphique d'aperçu",
|
||||
"xpack.investigateApp.appTitle": "Investigations",
|
||||
"xpack.investigateApp.assistantHypothesis.observabilityAIAssistantContextualInsight.helpMeInvestigateThisLabel": "Aidez-moi à résoudre la cause de cet échec",
|
||||
"xpack.investigateApp.defaultChart.error_equation.description": "Vérifiez l'équation.",
|
||||
"xpack.investigateApp.defaultChart.error_equation.title": "Une erreur s'est produite lors de l'affichage du graphique",
|
||||
"xpack.investigateApp.defaultChart.noData.title": "Aucune donnée graphique disponible",
|
||||
|
|
|
@ -25957,7 +25957,6 @@
|
|||
"xpack.investigateApp.addObservationUI.h2.addAnObservationChartLabel": "観測グラフを追加",
|
||||
"xpack.investigateApp.addObservationUI.p.selectADataSourceLabel": "データソースを選択して、プレビューグラフを生成",
|
||||
"xpack.investigateApp.appTitle": "調査",
|
||||
"xpack.investigateApp.assistantHypothesis.observabilityAIAssistantContextualInsight.helpMeInvestigateThisLabel": "このエラーの調査を支援",
|
||||
"xpack.investigateApp.defaultChart.error_equation.description": "式を確認してください。",
|
||||
"xpack.investigateApp.defaultChart.error_equation.title": "グラフの表示中にエラーが発生しました",
|
||||
"xpack.investigateApp.defaultChart.noData.title": "グラフデータがありません",
|
||||
|
|
|
@ -26040,7 +26040,6 @@
|
|||
"xpack.investigateApp.addObservationUI.h2.addAnObservationChartLabel": "添加观察图表",
|
||||
"xpack.investigateApp.addObservationUI.p.selectADataSourceLabel": "选择数据源以生成预览图表",
|
||||
"xpack.investigateApp.appTitle": "调查",
|
||||
"xpack.investigateApp.assistantHypothesis.observabilityAIAssistantContextualInsight.helpMeInvestigateThisLabel": "帮助我调查此故障",
|
||||
"xpack.investigateApp.defaultChart.error_equation.description": "检查方程。",
|
||||
"xpack.investigateApp.defaultChart.error_equation.title": "渲染图表时出错",
|
||||
"xpack.investigateApp.defaultChart.noData.title": "没有可用图表数据",
|
||||
|
|
|
@ -12,6 +12,7 @@ import {
|
|||
ChatCompletionEventType,
|
||||
} from '@kbn/inference-common';
|
||||
import { createOutputApi } from './create_output_api';
|
||||
import { createToolValidationError } from '../../server/chat_complete/errors';
|
||||
|
||||
describe('createOutputApi', () => {
|
||||
let chatComplete: jest.Mock;
|
||||
|
@ -119,4 +120,80 @@ describe('createOutputApi', () => {
|
|||
},
|
||||
]);
|
||||
});
|
||||
|
||||
describe('when using retry', () => {
|
||||
const unvalidatedFailedToolCall = {
|
||||
function: {
|
||||
name: 'myFunction',
|
||||
arguments: JSON.stringify({ foo: 'bar' }),
|
||||
},
|
||||
toolCallId: 'foo',
|
||||
};
|
||||
|
||||
const validationError = createToolValidationError('Validation failed', {
|
||||
toolCalls: [unvalidatedFailedToolCall],
|
||||
});
|
||||
|
||||
it('retries once when onValidationError is a boolean', async () => {
|
||||
chatComplete.mockRejectedValueOnce(validationError);
|
||||
chatComplete.mockResolvedValueOnce(
|
||||
Promise.resolve({ content: 'retried content', toolCalls: [unvalidatedFailedToolCall] })
|
||||
);
|
||||
|
||||
const output = createOutputApi(chatComplete);
|
||||
|
||||
const response = await output({
|
||||
id: 'retry-id',
|
||||
stream: false,
|
||||
connectorId: '.retry-connector',
|
||||
input: 'input message',
|
||||
retry: {
|
||||
onValidationError: true,
|
||||
},
|
||||
});
|
||||
|
||||
expect(chatComplete).toHaveBeenCalledTimes(2);
|
||||
expect(response).toEqual({
|
||||
id: 'retry-id',
|
||||
content: 'retried content',
|
||||
output: unvalidatedFailedToolCall.function.arguments,
|
||||
});
|
||||
});
|
||||
|
||||
it('retries the number of specified attempts', async () => {
|
||||
chatComplete.mockRejectedValue(validationError);
|
||||
|
||||
const output = createOutputApi(chatComplete);
|
||||
|
||||
await expect(
|
||||
output({
|
||||
id: 'retry-id',
|
||||
stream: false,
|
||||
connectorId: '.retry-connector',
|
||||
input: 'input message',
|
||||
retry: {
|
||||
onValidationError: 2,
|
||||
},
|
||||
})
|
||||
).rejects.toThrow('Validation failed');
|
||||
|
||||
expect(chatComplete).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
it('throws an error if retry is provided in streaming mode', () => {
|
||||
const output = createOutputApi(chatComplete);
|
||||
|
||||
expect(() =>
|
||||
output({
|
||||
id: 'stream-retry-id',
|
||||
stream: true,
|
||||
connectorId: '.stream-retry-connector',
|
||||
input: 'input message',
|
||||
retry: {
|
||||
onValidationError: 1,
|
||||
},
|
||||
})
|
||||
).toThrowError('Retry options are not supported in streaming mode');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -10,17 +10,22 @@ import {
|
|||
ChatCompletionEventType,
|
||||
MessageRole,
|
||||
OutputAPI,
|
||||
OutputCompositeResponse,
|
||||
OutputEventType,
|
||||
OutputOptions,
|
||||
ToolSchema,
|
||||
isToolValidationError,
|
||||
withoutTokenCountEvents,
|
||||
} from '@kbn/inference-common';
|
||||
import { isObservable, map } from 'rxjs';
|
||||
import { ensureMultiTurn } from '../utils/ensure_multi_turn';
|
||||
|
||||
type DefaultOutputOptions = OutputOptions<string, ToolSchema | undefined, boolean>;
|
||||
|
||||
export function createOutputApi(chatCompleteApi: ChatCompleteAPI): OutputAPI;
|
||||
|
||||
export function createOutputApi(chatCompleteApi: ChatCompleteAPI) {
|
||||
return ({
|
||||
return function callOutputApi({
|
||||
id,
|
||||
connectorId,
|
||||
input,
|
||||
|
@ -29,19 +34,26 @@ export function createOutputApi(chatCompleteApi: ChatCompleteAPI) {
|
|||
previousMessages,
|
||||
functionCalling,
|
||||
stream,
|
||||
}: OutputOptions<string, ToolSchema | undefined, boolean>) => {
|
||||
retry,
|
||||
}: DefaultOutputOptions): OutputCompositeResponse<string, ToolSchema | undefined, boolean> {
|
||||
if (stream && retry !== undefined) {
|
||||
throw new Error(`Retry options are not supported in streaming mode`);
|
||||
}
|
||||
|
||||
const messages = ensureMultiTurn([
|
||||
...(previousMessages || []),
|
||||
{
|
||||
role: MessageRole.User,
|
||||
content: input,
|
||||
},
|
||||
]);
|
||||
|
||||
const response = chatCompleteApi({
|
||||
connectorId,
|
||||
stream,
|
||||
functionCalling,
|
||||
system,
|
||||
messages: ensureMultiTurn([
|
||||
...(previousMessages || []),
|
||||
{
|
||||
role: MessageRole.User,
|
||||
content: input,
|
||||
},
|
||||
]),
|
||||
messages,
|
||||
...(schema
|
||||
? {
|
||||
tools: {
|
||||
|
@ -79,16 +91,55 @@ export function createOutputApi(chatCompleteApi: ChatCompleteAPI) {
|
|||
})
|
||||
);
|
||||
} else {
|
||||
return response.then((chatResponse) => {
|
||||
return {
|
||||
id,
|
||||
content: chatResponse.content,
|
||||
output:
|
||||
chatResponse.toolCalls.length && 'arguments' in chatResponse.toolCalls[0].function
|
||||
? chatResponse.toolCalls[0].function.arguments
|
||||
: undefined,
|
||||
};
|
||||
});
|
||||
return response.then(
|
||||
(chatResponse) => {
|
||||
return {
|
||||
id,
|
||||
content: chatResponse.content,
|
||||
output:
|
||||
chatResponse.toolCalls.length && 'arguments' in chatResponse.toolCalls[0].function
|
||||
? chatResponse.toolCalls[0].function.arguments
|
||||
: undefined,
|
||||
};
|
||||
},
|
||||
(error: Error) => {
|
||||
if (isToolValidationError(error) && retry?.onValidationError) {
|
||||
const retriesLeft =
|
||||
typeof retry.onValidationError === 'number' ? retry.onValidationError : 1;
|
||||
|
||||
return callOutputApi({
|
||||
id,
|
||||
connectorId,
|
||||
input,
|
||||
schema,
|
||||
system,
|
||||
previousMessages: messages.concat(
|
||||
{
|
||||
role: MessageRole.Assistant as const,
|
||||
content: '',
|
||||
toolCalls: error.meta.toolCalls!,
|
||||
},
|
||||
...(error.meta.toolCalls?.map((toolCall) => {
|
||||
return {
|
||||
name: toolCall.function.name,
|
||||
role: MessageRole.Tool as const,
|
||||
toolCallId: toolCall.toolCallId,
|
||||
response: {
|
||||
error: error.meta,
|
||||
},
|
||||
};
|
||||
}) ?? [])
|
||||
),
|
||||
functionCalling,
|
||||
stream: false,
|
||||
retry: {
|
||||
onValidationError: retriesLeft - 1,
|
||||
},
|
||||
}) as OutputCompositeResponse<string, ToolSchema | undefined, false>;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -170,6 +170,7 @@ describe('bedrockClaudeAdapter', () => {
|
|||
],
|
||||
},
|
||||
{
|
||||
name: 'my_function',
|
||||
role: MessageRole.Tool,
|
||||
toolCallId: '0',
|
||||
response: {
|
||||
|
|
|
@ -172,6 +172,7 @@ describe('geminiAdapter', () => {
|
|||
],
|
||||
},
|
||||
{
|
||||
name: 'my_function',
|
||||
role: MessageRole.Tool,
|
||||
toolCallId: '0',
|
||||
response: {
|
||||
|
|
|
@ -142,6 +142,7 @@ describe('openAIAdapter', () => {
|
|||
],
|
||||
},
|
||||
{
|
||||
name: 'my_function',
|
||||
role: MessageRole.Tool,
|
||||
toolCallId: '0',
|
||||
response: {
|
||||
|
|
|
@ -58,7 +58,6 @@ export const openAIAdapter: InferenceConnectorAdapter = {
|
|||
request = {
|
||||
stream,
|
||||
messages: messagesToOpenAI({ system: wrapped.system, messages: wrapped.messages }),
|
||||
temperature: 0,
|
||||
};
|
||||
} else {
|
||||
request = {
|
||||
|
@ -66,7 +65,6 @@ export const openAIAdapter: InferenceConnectorAdapter = {
|
|||
messages: messagesToOpenAI({ system, messages }),
|
||||
tool_choice: toolChoiceToOpenAI(toolChoice),
|
||||
tools: toolsToOpenAI(tools),
|
||||
temperature: 0,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import { last } from 'lodash';
|
||||
import { last, omit } from 'lodash';
|
||||
import { defer, switchMap, throwError } from 'rxjs';
|
||||
import type { Logger } from '@kbn/logging';
|
||||
import type { KibanaRequest } from '@kbn/core-http-server';
|
||||
|
@ -51,14 +51,26 @@ export function createChatCompleteApi({ request, actions, logger }: CreateChatCo
|
|||
const connectorType = connector.type;
|
||||
const inferenceAdapter = getInferenceAdapter(connectorType);
|
||||
|
||||
const messagesWithoutData = messages.map((message) => omit(message, 'data'));
|
||||
|
||||
if (!inferenceAdapter) {
|
||||
return throwError(() =>
|
||||
createInferenceRequestError(`Adapter for type ${connectorType} not implemented`, 400)
|
||||
);
|
||||
}
|
||||
|
||||
logger.debug(() => `Sending request: ${JSON.stringify(last(messages))}`);
|
||||
logger.trace(() => JSON.stringify({ messages, toolChoice, tools, system }));
|
||||
logger.debug(
|
||||
() => `Sending request, last message is: ${JSON.stringify(last(messagesWithoutData))}`
|
||||
);
|
||||
|
||||
logger.trace(() =>
|
||||
JSON.stringify({
|
||||
messages: messagesWithoutData,
|
||||
toolChoice,
|
||||
tools,
|
||||
system,
|
||||
})
|
||||
);
|
||||
|
||||
return inferenceAdapter.chatComplete({
|
||||
system,
|
||||
|
|
|
@ -44,7 +44,7 @@ export function createToolValidationError(
|
|||
name?: string;
|
||||
arguments?: string;
|
||||
errorsText?: string;
|
||||
toolCalls?: UnvalidatedToolCall[];
|
||||
toolCalls: UnvalidatedToolCall[];
|
||||
}
|
||||
): ChatCompletionToolValidationError {
|
||||
return new InferenceTaskError(ChatCompletionErrorCode.ToolValidationError, message, meta);
|
||||
|
|
|
@ -79,7 +79,7 @@ export function wrapWithSimulatedFunctionCalling({
|
|||
};
|
||||
}
|
||||
|
||||
const convertToolResponseMessage = (message: ToolMessage<unknown>): UserMessage => {
|
||||
const convertToolResponseMessage = (message: ToolMessage): UserMessage => {
|
||||
return {
|
||||
role: MessageRole.User,
|
||||
content: JSON.stringify({
|
||||
|
|
|
@ -183,7 +183,7 @@ describe('chunksIntoMessage', () => {
|
|||
}
|
||||
|
||||
await expect(async () => getMessage()).rejects.toThrowErrorMatchingInlineSnapshot(
|
||||
`"Tool call arguments for myFunction were invalid"`
|
||||
`"Tool call arguments for myFunction (001) were invalid"`
|
||||
);
|
||||
});
|
||||
|
||||
|
|
|
@ -5,17 +5,17 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import { last, map, merge, OperatorFunction, scan, share } from 'rxjs';
|
||||
import type { Logger } from '@kbn/logging';
|
||||
import {
|
||||
UnvalidatedToolCall,
|
||||
ToolOptions,
|
||||
ChatCompletionChunkEvent,
|
||||
ChatCompletionEventType,
|
||||
ChatCompletionMessageEvent,
|
||||
ChatCompletionTokenCountEvent,
|
||||
ToolOptions,
|
||||
UnvalidatedToolCall,
|
||||
withoutTokenCountEvents,
|
||||
} from '@kbn/inference-common';
|
||||
import type { Logger } from '@kbn/logging';
|
||||
import { OperatorFunction, map, merge, share, toArray } from 'rxjs';
|
||||
import { validateToolCalls } from '../../util/validate_tool_calls';
|
||||
|
||||
export function chunksIntoMessage<TToolOptions extends ToolOptions>({
|
||||
|
@ -37,38 +37,36 @@ export function chunksIntoMessage<TToolOptions extends ToolOptions>({
|
|||
shared$,
|
||||
shared$.pipe(
|
||||
withoutTokenCountEvents(),
|
||||
scan(
|
||||
(prev, chunk) => {
|
||||
prev.content += chunk.content ?? '';
|
||||
toArray(),
|
||||
map((chunks): ChatCompletionMessageEvent<TToolOptions> => {
|
||||
const concatenatedChunk = chunks.reduce(
|
||||
(prev, chunk) => {
|
||||
prev.content += chunk.content ?? '';
|
||||
|
||||
chunk.tool_calls?.forEach((toolCall) => {
|
||||
let prevToolCall = prev.tool_calls[toolCall.index];
|
||||
if (!prevToolCall) {
|
||||
prev.tool_calls[toolCall.index] = {
|
||||
function: {
|
||||
name: '',
|
||||
arguments: '',
|
||||
},
|
||||
toolCallId: '',
|
||||
};
|
||||
chunk.tool_calls?.forEach((toolCall) => {
|
||||
let prevToolCall = prev.tool_calls[toolCall.index];
|
||||
if (!prevToolCall) {
|
||||
prev.tool_calls[toolCall.index] = {
|
||||
function: {
|
||||
name: '',
|
||||
arguments: '',
|
||||
},
|
||||
toolCallId: '',
|
||||
};
|
||||
|
||||
prevToolCall = prev.tool_calls[toolCall.index];
|
||||
}
|
||||
prevToolCall = prev.tool_calls[toolCall.index];
|
||||
}
|
||||
|
||||
prevToolCall.function.name += toolCall.function.name;
|
||||
prevToolCall.function.arguments += toolCall.function.arguments;
|
||||
prevToolCall.toolCallId += toolCall.toolCallId;
|
||||
});
|
||||
prevToolCall.function.name += toolCall.function.name;
|
||||
prevToolCall.function.arguments += toolCall.function.arguments;
|
||||
prevToolCall.toolCallId += toolCall.toolCallId;
|
||||
});
|
||||
|
||||
return prev;
|
||||
},
|
||||
{ content: '', tool_calls: [] as UnvalidatedToolCall[] }
|
||||
);
|
||||
|
||||
return prev;
|
||||
},
|
||||
{
|
||||
content: '',
|
||||
tool_calls: [] as UnvalidatedToolCall[],
|
||||
}
|
||||
),
|
||||
last(),
|
||||
map((concatenatedChunk): ChatCompletionMessageEvent<TToolOptions> => {
|
||||
logger.debug(() => `Received completed message: ${JSON.stringify(concatenatedChunk)}`);
|
||||
|
||||
const validatedToolCalls = validateToolCalls<TToolOptions>({
|
||||
|
|
|
@ -76,9 +76,11 @@ const chatCompleteBodySchema: Type<ChatCompleteRequestBody> = schema.object({
|
|||
name: schema.maybe(schema.string()),
|
||||
}),
|
||||
schema.object({
|
||||
name: schema.string(),
|
||||
role: schema.literal(MessageRole.Tool),
|
||||
toolCallId: schema.string(),
|
||||
response: schema.recordOf(schema.string(), schema.any()),
|
||||
data: schema.maybe(schema.recordOf(schema.string(), schema.any())),
|
||||
}),
|
||||
])
|
||||
),
|
||||
|
|
|
@ -34,6 +34,7 @@ export const generateEsqlTask = <TToolOptions extends ToolOptions>({
|
|||
docBase,
|
||||
functionCalling,
|
||||
logger,
|
||||
system,
|
||||
}: {
|
||||
connectorId: string;
|
||||
systemMessage: string;
|
||||
|
@ -43,6 +44,7 @@ export const generateEsqlTask = <TToolOptions extends ToolOptions>({
|
|||
docBase: EsqlDocumentBase;
|
||||
functionCalling?: FunctionCallingMode;
|
||||
logger: Pick<Logger, 'debug'>;
|
||||
system?: string;
|
||||
}) => {
|
||||
return function askLlmToRespond({
|
||||
documentationRequest: { commands, functions },
|
||||
|
@ -97,7 +99,7 @@ export const generateEsqlTask = <TToolOptions extends ToolOptions>({
|
|||
|
||||
When converting queries from one language to ES|QL, make sure that the functions are available
|
||||
and documented in ES|QL. E.g., for SPL's LEN, use LENGTH. For IF, use CASE.
|
||||
`,
|
||||
${system ? `## Additional instructions\n\n${system}` : ''}`,
|
||||
messages: [
|
||||
...messages,
|
||||
{
|
||||
|
@ -106,6 +108,7 @@ export const generateEsqlTask = <TToolOptions extends ToolOptions>({
|
|||
toolCalls: [fakeRequestDocsToolCall],
|
||||
},
|
||||
{
|
||||
name: fakeRequestDocsToolCall.function.name,
|
||||
role: MessageRole.Tool,
|
||||
response: {
|
||||
documentation: requestedDocumentation,
|
||||
|
|
|
@ -21,6 +21,7 @@ export function naturalLanguageToEsql<TToolOptions extends ToolOptions>({
|
|||
toolChoice,
|
||||
logger,
|
||||
functionCalling,
|
||||
system,
|
||||
...rest
|
||||
}: NlToEsqlTaskParams<TToolOptions>): Observable<NlToEsqlTaskEvent<TToolOptions>> {
|
||||
return from(loadDocBase()).pipe(
|
||||
|
@ -41,6 +42,7 @@ export function naturalLanguageToEsql<TToolOptions extends ToolOptions>({
|
|||
tools,
|
||||
toolChoice,
|
||||
},
|
||||
system,
|
||||
});
|
||||
|
||||
return requestDocumentation({
|
||||
|
|
|
@ -29,5 +29,6 @@ export type NlToEsqlTaskParams<TToolOptions extends ToolOptions> = {
|
|||
connectorId: string;
|
||||
logger: Pick<Logger, 'debug'>;
|
||||
functionCalling?: FunctionCallingMode;
|
||||
system?: string;
|
||||
} & TToolOptions &
|
||||
({ input: string } | { messages: Message[] });
|
||||
|
|
|
@ -108,7 +108,7 @@ describe('validateToolCalls', () => {
|
|||
});
|
||||
}
|
||||
expect(() => validate()).toThrowErrorMatchingInlineSnapshot(
|
||||
`"Tool call arguments for my_function were invalid"`
|
||||
`"Tool call arguments for my_function (1) were invalid"`
|
||||
);
|
||||
|
||||
try {
|
||||
|
@ -119,6 +119,15 @@ describe('validateToolCalls', () => {
|
|||
arguments: JSON.stringify({ foo: 'bar' }),
|
||||
errorsText: `data must have required property 'bar'`,
|
||||
name: 'my_function',
|
||||
toolCalls: [
|
||||
{
|
||||
function: {
|
||||
arguments: JSON.stringify({ foo: 'bar' }),
|
||||
name: 'my_function',
|
||||
},
|
||||
toolCallId: '1',
|
||||
},
|
||||
],
|
||||
});
|
||||
} else {
|
||||
fail('Expected toolValidationError');
|
||||
|
|
|
@ -54,11 +54,12 @@ export function validateToolCalls<TToolOptions extends ToolOptions>({
|
|||
|
||||
if (!valid) {
|
||||
throw createToolValidationError(
|
||||
`Tool call arguments for ${toolCall.function.name} were invalid`,
|
||||
`Tool call arguments for ${toolCall.function.name} (${toolCall.toolCallId}) were invalid`,
|
||||
{
|
||||
name: toolCall.function.name,
|
||||
errorsText: validator.errorsText(),
|
||||
arguments: toolCall.function.arguments,
|
||||
toolCalls,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
|
|
@ -51,6 +51,9 @@
|
|||
"@kbn/rule-data-utils",
|
||||
"@kbn/spaces-plugin",
|
||||
"@kbn/cloud-plugin",
|
||||
"@kbn/observability-utils-browser",
|
||||
"@kbn/observability-utils-server",
|
||||
"@kbn/observability-utils-common",
|
||||
"@kbn/storybook",
|
||||
"@kbn/dashboard-plugin",
|
||||
"@kbn/deeplinks-analytics",
|
||||
|
|
|
@ -17,6 +17,7 @@ import { SearchBar, IUnifiedSearchPluginServices } from '@kbn/unified-search-plu
|
|||
import { KibanaContextProvider } from '@kbn/kibana-react-plugin/public';
|
||||
import { merge } from 'lodash';
|
||||
import { Storage } from '@kbn/kibana-utils-plugin/public';
|
||||
import { of } from 'rxjs';
|
||||
import type { EsqlQueryMeta } from '../public/services/esql';
|
||||
import type { InvestigateAppServices } from '../public/services/types';
|
||||
import { InvestigateAppKibanaContext } from '../public/hooks/use_kibana';
|
||||
|
@ -54,6 +55,10 @@ export function getMockInvestigateAppContext(): DeeplyMockedKeys<InvestigateAppK
|
|||
}),
|
||||
},
|
||||
charts: {} as any,
|
||||
investigateAppRepositoryClient: {
|
||||
fetch: jest.fn().mockImplementation(() => Promise.resolve()),
|
||||
stream: jest.fn().mockImplementation(() => of()) as any,
|
||||
},
|
||||
};
|
||||
|
||||
const core = coreMock.createStart();
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
"type": "plugin",
|
||||
"id": "@kbn/investigate-app-plugin",
|
||||
"owner": "@elastic/obs-ux-management-team",
|
||||
"group": "observability",
|
||||
"visibility": "private",
|
||||
"group": "observability",
|
||||
"plugin": {
|
||||
"id": "investigateApp",
|
||||
"server": true,
|
||||
|
@ -24,14 +24,22 @@
|
|||
"observability",
|
||||
"licensing",
|
||||
"ruleRegistry",
|
||||
"inference",
|
||||
"alerting",
|
||||
"spaces",
|
||||
"slo",
|
||||
"apmDataAccess",
|
||||
"usageCollection"
|
||||
],
|
||||
"optionalPlugins": [
|
||||
"observabilityAIAssistant",
|
||||
"observabilityAIAssistantApp"
|
||||
],
|
||||
"requiredBundles": [
|
||||
"esql",
|
||||
"kibanaReact",
|
||||
"kibanaUtils"
|
||||
],
|
||||
"optionalPlugins": ["observabilityAIAssistant"],
|
||||
"extraPublicDirs": []
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@ import type {
|
|||
ReturnOf,
|
||||
RouteRepositoryClient,
|
||||
} from '@kbn/server-route-repository';
|
||||
import { formatRequest } from '@kbn/server-route-repository-utils/src/format_request';
|
||||
import { createRepositoryClient } from '@kbn/server-route-repository-client';
|
||||
import type { InvestigateAppServerRouteRepository } from '../../server';
|
||||
|
||||
type FetchOptions = Omit<HttpFetchOptions, 'body'> & {
|
||||
|
@ -25,15 +25,15 @@ export type InvestigateAppAPIClientOptions = Omit<
|
|||
signal: AbortSignal | null;
|
||||
};
|
||||
|
||||
export type InvestigateAppAPIClient = RouteRepositoryClient<
|
||||
export type InvestigateAppRepositoryClient = RouteRepositoryClient<
|
||||
InvestigateAppServerRouteRepository,
|
||||
InvestigateAppAPIClientOptions
|
||||
>['fetch'];
|
||||
>;
|
||||
|
||||
export type AutoAbortedInvestigateAppAPIClient = RouteRepositoryClient<
|
||||
export type AutoAbortedInvestigateAppRepositoryClient = RouteRepositoryClient<
|
||||
InvestigateAppServerRouteRepository,
|
||||
Omit<InvestigateAppAPIClientOptions, 'signal'>
|
||||
>['fetch'];
|
||||
>;
|
||||
|
||||
export type InvestigateAppAPIEndpoint = keyof InvestigateAppServerRouteRepository;
|
||||
|
||||
|
@ -45,19 +45,6 @@ export type APIReturnType<TEndpoint extends InvestigateAppAPIEndpoint> = ReturnO
|
|||
export type InvestigateAppAPIClientRequestParamsOf<TEndpoint extends InvestigateAppAPIEndpoint> =
|
||||
ClientRequestParamsOf<InvestigateAppServerRouteRepository, TEndpoint>;
|
||||
|
||||
export function createCallInvestigateAppAPI(core: CoreStart | CoreSetup) {
|
||||
return ((endpoint, options) => {
|
||||
const { params } = options as unknown as {
|
||||
params?: Partial<Record<string, any>>;
|
||||
};
|
||||
|
||||
const { method, pathname, version } = formatRequest(endpoint, params?.path);
|
||||
|
||||
return core.http[method](pathname, {
|
||||
...options,
|
||||
body: params && params.body ? JSON.stringify(params.body) : undefined,
|
||||
query: params?.query,
|
||||
version,
|
||||
});
|
||||
}) as InvestigateAppAPIClient;
|
||||
export function createInvestigateAppRepositoryClient(core: CoreStart | CoreSetup) {
|
||||
return createRepositoryClient(core) as InvestigateAppRepositoryClient;
|
||||
}
|
||||
|
|
|
@ -4,19 +4,22 @@
|
|||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
import dedent from 'dedent';
|
||||
import {
|
||||
ALERT_RULE_PARAMETERS,
|
||||
ALERT_START,
|
||||
ALERT_RULE_CATEGORY,
|
||||
ALERT_REASON,
|
||||
} from '@kbn/rule-data-utils';
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { EntityWithSource } from '@kbn/investigation-shared';
|
||||
import React, { useCallback } from 'react';
|
||||
import type { RootCauseAnalysisEvent } from '@kbn/observability-ai-server/root_cause_analysis';
|
||||
import { EcsFieldsResponse } from '@kbn/rule-registry-plugin/common';
|
||||
import React, { useState, useRef, useEffect } from 'react';
|
||||
import { omit } from 'lodash';
|
||||
import {
|
||||
ALERT_FLAPPING_HISTORY,
|
||||
ALERT_RULE_EXECUTION_TIMESTAMP,
|
||||
ALERT_RULE_EXECUTION_UUID,
|
||||
EVENT_ACTION,
|
||||
EVENT_KIND,
|
||||
} from '@kbn/rule-registry-plugin/common/technical_rule_data_field_names';
|
||||
import { isRequestAbortedError } from '@kbn/server-route-repository-client';
|
||||
import { useKibana } from '../../../../hooks/use_kibana';
|
||||
import { useInvestigation } from '../../contexts/investigation_context';
|
||||
import { useFetchEntities } from '../../../../hooks/use_fetch_entities';
|
||||
import { useUpdateInvestigation } from '../../../../hooks/use_update_investigation';
|
||||
|
||||
export interface InvestigationContextualInsight {
|
||||
key: string;
|
||||
|
@ -25,98 +28,177 @@ export interface InvestigationContextualInsight {
|
|||
}
|
||||
|
||||
export function AssistantHypothesis({ investigationId }: { investigationId: string }) {
|
||||
const { alert } = useInvestigation();
|
||||
const {
|
||||
alert,
|
||||
globalParams: { timeRange },
|
||||
investigation,
|
||||
} = useInvestigation();
|
||||
const {
|
||||
core: { notifications },
|
||||
services: { investigateAppRepositoryClient },
|
||||
dependencies: {
|
||||
start: {
|
||||
observabilityAIAssistant: {
|
||||
ObservabilityAIAssistantContextualInsight,
|
||||
getContextualInsightMessages,
|
||||
},
|
||||
observabilityAIAssistant: { useGenAIConnectors },
|
||||
observabilityAIAssistantApp: { RootCauseAnalysisContainer },
|
||||
},
|
||||
},
|
||||
} = useKibana();
|
||||
const { data: entitiesData } = useFetchEntities({
|
||||
investigationId,
|
||||
serviceName: alert?.['service.name'] ? `${alert?.['service.name']}` : undefined,
|
||||
serviceEnvironment: alert?.['service.environment']
|
||||
? `${alert?.['service.environment']}`
|
||||
: undefined,
|
||||
hostName: alert?.['host.name'] ? `${alert?.['host.name']}` : undefined,
|
||||
containerId: alert?.['container.id'] ? `${alert?.['container.id']}` : undefined,
|
||||
});
|
||||
|
||||
const getAlertContextMessages = useCallback(async () => {
|
||||
if (!getContextualInsightMessages || !alert) {
|
||||
return [];
|
||||
const { mutateAsync: updateInvestigation } = useUpdateInvestigation();
|
||||
|
||||
const { loading: loadingConnector, selectedConnector } = useGenAIConnectors();
|
||||
|
||||
const serviceName = alert?.['service.name'] as string | undefined;
|
||||
|
||||
const [events, setEvents] = useState<RootCauseAnalysisEvent[]>([]);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [error, setError] = useState<Error | undefined>(undefined);
|
||||
|
||||
const controllerRef = useRef(new AbortController());
|
||||
|
||||
useEffect(() => {
|
||||
if (investigation?.rootCauseAnalysis) {
|
||||
setEvents(investigation.rootCauseAnalysis.events);
|
||||
}
|
||||
}, [investigation?.rootCauseAnalysis]);
|
||||
|
||||
const entities = entitiesData?.entities ?? [];
|
||||
const [completeInBackground, setCompleteInBackground] = useState(true);
|
||||
|
||||
const entityContext = entities?.length
|
||||
? `
|
||||
Alerts can optionally be associated with entities. Entities can be services, hosts, containers, or other resources. Entities can have metrics associated with them.
|
||||
|
||||
The alert that triggered this investigation is associated with the following entities: ${entities
|
||||
.map((entity, index) => {
|
||||
return dedent(`
|
||||
## Entity ${index + 1}:
|
||||
${formatEntityMetrics(entity)};
|
||||
`);
|
||||
})
|
||||
.join('/n/n')}`
|
||||
: '';
|
||||
const runRootCauseAnalysis = ({
|
||||
alert: nonNullishAlert,
|
||||
connectorId,
|
||||
serviceName: nonNullishServiceName,
|
||||
}: {
|
||||
alert: EcsFieldsResponse;
|
||||
connectorId: string;
|
||||
serviceName: string;
|
||||
}) => {
|
||||
const rangeFrom = timeRange.from;
|
||||
|
||||
return getContextualInsightMessages({
|
||||
message: `I am investigating a failure in my system. I was made aware of the failure by an alert and I am trying to understand the root cause of the issue.`,
|
||||
instructions: dedent(
|
||||
`I'm an SRE. I am investigating a failure in my system. I was made aware of the failure via an alert. Your current task is to help me identify the root cause of the failure in my system.
|
||||
const rangeTo = timeRange.to;
|
||||
|
||||
The rule that triggered the alert is a ${
|
||||
alert[ALERT_RULE_CATEGORY]
|
||||
} rule. The alert started at ${alert[ALERT_START]}. The alert reason is ${
|
||||
alert[ALERT_REASON]
|
||||
}. The rule parameters are ${JSON.stringify(ALERT_RULE_PARAMETERS)}.
|
||||
setLoading(true);
|
||||
|
||||
${entityContext}
|
||||
setError(undefined);
|
||||
|
||||
Based on the alert details, suggest a root cause and next steps to mitigate the issue.
|
||||
|
||||
I do not have the alert details or entity details in front of me, so be sure to repeat the alert reason (${
|
||||
alert[ALERT_REASON]
|
||||
}), when the alert was triggered (${
|
||||
alert[ALERT_START]
|
||||
}), and the entity metrics in your response.
|
||||
setEvents([]);
|
||||
|
||||
When displaying the entity metrics, please convert the metrics to a human-readable format. For example, convert "logRate" to "Log Rate" and "errorRate" to "Error Rate".
|
||||
`
|
||||
),
|
||||
});
|
||||
}, [alert, getContextualInsightMessages, entitiesData?.entities]);
|
||||
investigateAppRepositoryClient
|
||||
.stream('POST /internal/observability/investigation/root_cause_analysis', {
|
||||
params: {
|
||||
body: {
|
||||
investigationId,
|
||||
connectorId,
|
||||
context: `The user is investigating an alert for the ${serviceName} service,
|
||||
and wants to find the root cause. Here is the alert:
|
||||
|
||||
if (!ObservabilityAIAssistantContextualInsight) {
|
||||
${JSON.stringify(sanitizeAlert(nonNullishAlert))}`,
|
||||
rangeFrom,
|
||||
rangeTo,
|
||||
serviceName: nonNullishServiceName,
|
||||
completeInBackground,
|
||||
},
|
||||
},
|
||||
signal: controllerRef.current.signal,
|
||||
})
|
||||
.subscribe({
|
||||
next: (event) => {
|
||||
setEvents((prev) => {
|
||||
return prev.concat(event.event);
|
||||
});
|
||||
},
|
||||
error: (nextError) => {
|
||||
if (!isRequestAbortedError(nextError)) {
|
||||
notifications.toasts.addError(nextError, {
|
||||
title: i18n.translate(
|
||||
'xpack.investigateApp.assistantHypothesis.failedToLoadAnalysis',
|
||||
{
|
||||
defaultMessage: `Failed to load analysis`,
|
||||
}
|
||||
),
|
||||
});
|
||||
setError(nextError);
|
||||
} else {
|
||||
setError(
|
||||
new Error(
|
||||
i18n.translate('xpack.investigateApp.assistantHypothesis.analysisAborted', {
|
||||
defaultMessage: `Analysis was aborted`,
|
||||
})
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
setLoading(false);
|
||||
},
|
||||
complete: () => {
|
||||
setLoading(false);
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
if (!serviceName) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return alert && entitiesData ? (
|
||||
<ObservabilityAIAssistantContextualInsight
|
||||
title={i18n.translate(
|
||||
'xpack.investigateApp.assistantHypothesis.observabilityAIAssistantContextualInsight.helpMeInvestigateThisLabel',
|
||||
{ defaultMessage: 'Help me investigate this failure' }
|
||||
)}
|
||||
messages={getAlertContextMessages}
|
||||
return (
|
||||
<RootCauseAnalysisContainer
|
||||
events={events}
|
||||
loading={loading || loadingConnector}
|
||||
completeInBackground={completeInBackground}
|
||||
onCompleteInBackgroundClick={() => {
|
||||
setCompleteInBackground(() => !completeInBackground);
|
||||
}}
|
||||
onStopAnalysisClick={() => {
|
||||
controllerRef.current.abort();
|
||||
controllerRef.current = new AbortController();
|
||||
}}
|
||||
onClearAnalysisClick={() => {
|
||||
setEvents([]);
|
||||
if (investigation?.rootCauseAnalysis) {
|
||||
updateInvestigation({
|
||||
investigationId,
|
||||
payload: {
|
||||
rootCauseAnalysis: {
|
||||
events: [],
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
}}
|
||||
onResetAnalysisClick={() => {
|
||||
controllerRef.current.abort();
|
||||
controllerRef.current = new AbortController();
|
||||
if (alert && selectedConnector && serviceName) {
|
||||
runRootCauseAnalysis({
|
||||
alert,
|
||||
connectorId: selectedConnector,
|
||||
serviceName,
|
||||
});
|
||||
}
|
||||
}}
|
||||
error={error}
|
||||
onStartAnalysisClick={() => {
|
||||
if (alert && selectedConnector && serviceName) {
|
||||
runRootCauseAnalysis({
|
||||
alert,
|
||||
connectorId: selectedConnector,
|
||||
serviceName,
|
||||
});
|
||||
}
|
||||
}}
|
||||
/>
|
||||
) : null;
|
||||
);
|
||||
}
|
||||
|
||||
function sanitizeAlert(alert: EcsFieldsResponse) {
|
||||
return omit(
|
||||
alert,
|
||||
ALERT_RULE_EXECUTION_TIMESTAMP,
|
||||
'_index',
|
||||
ALERT_FLAPPING_HISTORY,
|
||||
EVENT_ACTION,
|
||||
EVENT_KIND,
|
||||
ALERT_RULE_EXECUTION_UUID,
|
||||
'@timestamp'
|
||||
);
|
||||
}
|
||||
const formatEntityMetrics = (entity: EntityWithSource): string => {
|
||||
const entityMetrics = Object.entries(entity.metrics)
|
||||
.map(([key, value]) => `${key}: ${value}`)
|
||||
.join(', ');
|
||||
const entitySources = entity.sources.map((source) => source.dataStream).join(', ');
|
||||
return dedent(`
|
||||
Entity name: ${entity.display_name};
|
||||
Entity type: ${entity.type};
|
||||
Entity metrics: ${entityMetrics};
|
||||
Entity data streams: ${entitySources}
|
||||
`);
|
||||
};
|
||||
|
|
|
@ -27,6 +27,7 @@ import type {
|
|||
InvestigateAppSetupDependencies,
|
||||
InvestigateAppStartDependencies,
|
||||
} from './types';
|
||||
import { createInvestigateAppRepositoryClient, InvestigateAppRepositoryClient } from './api';
|
||||
|
||||
const getCreateEsqlService = once(() => import('./services/esql').then((m) => m.createEsqlService));
|
||||
|
||||
|
@ -41,6 +42,7 @@ export class InvestigateAppPlugin
|
|||
{
|
||||
logger: Logger;
|
||||
config: ConfigSchema;
|
||||
repositoryClient!: InvestigateAppRepositoryClient;
|
||||
|
||||
constructor(context: PluginInitializerContext<ConfigSchema>) {
|
||||
this.logger = context.logger.get();
|
||||
|
@ -51,6 +53,8 @@ export class InvestigateAppPlugin
|
|||
coreSetup: CoreSetup<InvestigateAppStartDependencies, InvestigateAppPublicStart>,
|
||||
pluginsSetup: InvestigateAppSetupDependencies
|
||||
): InvestigateAppPublicSetup {
|
||||
this.repositoryClient = createInvestigateAppRepositoryClient(coreSetup);
|
||||
|
||||
coreSetup.application.register({
|
||||
id: INVESTIGATE_APP_ID,
|
||||
title: i18n.translate('xpack.investigateApp.appTitle', {
|
||||
|
@ -93,6 +97,7 @@ export class InvestigateAppPlugin
|
|||
lens: pluginsStart.lens,
|
||||
}),
|
||||
charts: pluginsStart.charts,
|
||||
investigateAppRepositoryClient: this.repositoryClient,
|
||||
};
|
||||
|
||||
ReactDOM.render(
|
||||
|
@ -127,6 +132,7 @@ export class InvestigateAppPlugin
|
|||
start: pluginsStart,
|
||||
},
|
||||
services: {
|
||||
investigateAppRepositoryClient: this.repositoryClient,
|
||||
esql: createEsqlService({
|
||||
data: pluginsStart.data,
|
||||
dataViews: pluginsStart.dataViews,
|
||||
|
|
|
@ -7,8 +7,10 @@
|
|||
|
||||
import { ChartsPluginStart } from '@kbn/charts-plugin/public';
|
||||
import type { EsqlService } from './esql';
|
||||
import type { InvestigateAppRepositoryClient } from '../api';
|
||||
|
||||
export interface InvestigateAppServices {
|
||||
esql: EsqlService;
|
||||
charts: ChartsPluginStart;
|
||||
investigateAppRepositoryClient: InvestigateAppRepositoryClient;
|
||||
}
|
||||
|
|
|
@ -8,6 +8,10 @@ import type {
|
|||
ObservabilityAIAssistantPublicSetup,
|
||||
ObservabilityAIAssistantPublicStart,
|
||||
} from '@kbn/observability-ai-assistant-plugin/public';
|
||||
import type {
|
||||
ObservabilityAIAssistantAppPublicSetup,
|
||||
ObservabilityAIAssistantAppPublicStart,
|
||||
} from '@kbn/observability-ai-assistant-app-plugin/public';
|
||||
import { ChartsPluginStart } from '@kbn/charts-plugin/public';
|
||||
import type { ContentManagementPublicStart } from '@kbn/content-management-plugin/public';
|
||||
import type { DataPublicPluginSetup, DataPublicPluginStart } from '@kbn/data-plugin/public';
|
||||
|
@ -43,6 +47,7 @@ export interface InvestigateAppSetupDependencies {
|
|||
investigate: InvestigatePublicSetup;
|
||||
observabilityShared: ObservabilitySharedPluginSetup;
|
||||
observabilityAIAssistant: ObservabilityAIAssistantPublicSetup;
|
||||
observabilityAIAssistantApp: ObservabilityAIAssistantAppPublicSetup;
|
||||
lens: LensPublicSetup;
|
||||
dataViews: DataViewsPublicPluginSetup;
|
||||
data: DataPublicPluginSetup;
|
||||
|
@ -58,6 +63,7 @@ export interface InvestigateAppStartDependencies {
|
|||
investigate: InvestigatePublicStart;
|
||||
observabilityShared: ObservabilitySharedPluginStart;
|
||||
observabilityAIAssistant: ObservabilityAIAssistantPublicStart;
|
||||
observabilityAIAssistantApp: ObservabilityAIAssistantAppPublicStart;
|
||||
lens: LensPublicStart;
|
||||
dataViews: DataViewsPublicPluginStart;
|
||||
data: DataPublicPluginStart;
|
||||
|
|
|
@ -15,18 +15,19 @@ import {
|
|||
findInvestigationsParamsSchema,
|
||||
getAllInvestigationStatsParamsSchema,
|
||||
getAllInvestigationTagsParamsSchema,
|
||||
getEntitiesParamsSchema,
|
||||
GetEntitiesResponse,
|
||||
getEventsParamsSchema,
|
||||
GetEventsResponse,
|
||||
getInvestigationItemsParamsSchema,
|
||||
getInvestigationNotesParamsSchema,
|
||||
getInvestigationParamsSchema,
|
||||
updateInvestigationItemParamsSchema,
|
||||
updateInvestigationNoteParamsSchema,
|
||||
updateInvestigationParamsSchema,
|
||||
getEventsParamsSchema,
|
||||
GetEventsResponse,
|
||||
getEntitiesParamsSchema,
|
||||
GetEntitiesResponse,
|
||||
} from '@kbn/investigation-shared';
|
||||
import { ScopedAnnotationsClient } from '@kbn/observability-plugin/server';
|
||||
import { createEntitiesESClient } from '../clients/create_entities_es_client';
|
||||
import { createInvestigation } from '../services/create_investigation';
|
||||
import { createInvestigationItem } from '../services/create_investigation_item';
|
||||
import { createInvestigationNote } from '../services/create_investigation_note';
|
||||
|
@ -34,20 +35,20 @@ import { deleteInvestigation } from '../services/delete_investigation';
|
|||
import { deleteInvestigationItem } from '../services/delete_investigation_item';
|
||||
import { deleteInvestigationNote } from '../services/delete_investigation_note';
|
||||
import { findInvestigations } from '../services/find_investigations';
|
||||
import { AlertsClient, getAlertsClient } from '../services/get_alerts_client';
|
||||
import { getAllInvestigationStats } from '../services/get_all_investigation_stats';
|
||||
import { getAllInvestigationTags } from '../services/get_all_investigation_tags';
|
||||
import { getEntitiesWithSource } from '../services/get_entities';
|
||||
import { getAlertEvents, getAnnotationEvents } from '../services/get_events';
|
||||
import { getInvestigation } from '../services/get_investigation';
|
||||
import { getInvestigationItems } from '../services/get_investigation_items';
|
||||
import { getInvestigationNotes } from '../services/get_investigation_notes';
|
||||
import { investigationRepositoryFactory } from '../services/investigation_repository';
|
||||
import { updateInvestigation } from '../services/update_investigation';
|
||||
import { getAlertEvents, getAnnotationEvents } from '../services/get_events';
|
||||
import { AlertsClient, getAlertsClient } from '../services/get_alerts_client';
|
||||
import { updateInvestigationItem } from '../services/update_investigation_item';
|
||||
import { updateInvestigationNote } from '../services/update_investigation_note';
|
||||
import { createInvestigateAppServerRoute } from './create_investigate_app_server_route';
|
||||
import { getAllInvestigationStats } from '../services/get_all_investigation_stats';
|
||||
import { getEntitiesWithSource } from '../services/get_entities';
|
||||
import { createEntitiesESClient } from '../clients/create_entities_es_client';
|
||||
import { rootCauseAnalysisRoute } from './rca/route';
|
||||
|
||||
const createInvestigationRoute = createInvestigateAppServerRoute({
|
||||
endpoint: 'POST /api/observability/investigations 2023-10-31',
|
||||
|
@ -400,6 +401,7 @@ export function getGlobalInvestigateAppServerRouteRepository() {
|
|||
...getEntitiesRoute,
|
||||
...getAllInvestigationStatsRoute,
|
||||
...getAllInvestigationTagsRoute,
|
||||
...rootCauseAnalysisRoute,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,163 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { Observable, catchError, from, of, share, switchMap, toArray } from 'rxjs';
|
||||
import { ServerSentEventBase } from '@kbn/sse-utils';
|
||||
import {
|
||||
RootCauseAnalysisEvent,
|
||||
runRootCauseAnalysis,
|
||||
} from '@kbn/observability-ai-server/root_cause_analysis';
|
||||
import { z } from '@kbn/zod';
|
||||
import datemath from '@elastic/datemath';
|
||||
import { OBSERVABILITY_LOGS_DATA_ACCESS_LOG_SOURCES_ID } from '@kbn/management-settings-ids';
|
||||
import { createObservabilityEsClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client';
|
||||
import { preconditionFailed } from '@hapi/boom';
|
||||
import { createInvestigateAppServerRoute } from '../create_investigate_app_server_route';
|
||||
import { investigationRepositoryFactory } from '../../services/investigation_repository';
|
||||
|
||||
export const rootCauseAnalysisRoute = createInvestigateAppServerRoute({
|
||||
endpoint: 'POST /internal/observability/investigation/root_cause_analysis',
|
||||
options: {
|
||||
tags: [],
|
||||
},
|
||||
params: z.object({
|
||||
body: z.object({
|
||||
investigationId: z.string(),
|
||||
rangeFrom: z.string(),
|
||||
rangeTo: z.string(),
|
||||
serviceName: z.string(),
|
||||
context: z.string(),
|
||||
connectorId: z.string(),
|
||||
completeInBackground: z.boolean().optional(),
|
||||
}),
|
||||
}),
|
||||
handler: async ({
|
||||
params,
|
||||
plugins,
|
||||
request,
|
||||
context: requestContext,
|
||||
logger,
|
||||
}): Promise<Observable<ServerSentEventBase<'event', { event: RootCauseAnalysisEvent }>>> => {
|
||||
const {
|
||||
body: {
|
||||
investigationId,
|
||||
context,
|
||||
rangeFrom,
|
||||
rangeTo,
|
||||
serviceName,
|
||||
connectorId,
|
||||
completeInBackground,
|
||||
},
|
||||
} = params;
|
||||
|
||||
if (!plugins.observabilityAIAssistant) {
|
||||
throw preconditionFailed('Observability AI Assistant plugin is not available');
|
||||
}
|
||||
|
||||
const start = datemath.parse(rangeFrom)?.valueOf()!;
|
||||
const end = datemath.parse(rangeTo)?.valueOf()!;
|
||||
|
||||
const coreContext = await requestContext.core;
|
||||
|
||||
const coreEsClient = coreContext.elasticsearch.client.asCurrentUser;
|
||||
const soClient = coreContext.savedObjects.client;
|
||||
const uiSettingsClient = coreContext.uiSettings.client;
|
||||
|
||||
const repository = investigationRepositoryFactory({ soClient, logger });
|
||||
|
||||
const esClient = createObservabilityEsClient({
|
||||
client: coreEsClient,
|
||||
logger,
|
||||
plugin: 'investigateApp',
|
||||
});
|
||||
|
||||
const [
|
||||
investigation,
|
||||
rulesClient,
|
||||
alertsClient,
|
||||
inferenceClient,
|
||||
observabilityAIAssistantClient,
|
||||
spaceId = 'default',
|
||||
apmIndices,
|
||||
logSources,
|
||||
sloSummaryIndices,
|
||||
] = await Promise.all([
|
||||
repository.findById(investigationId),
|
||||
(await plugins.alerting.start()).getRulesClientWithRequest(request),
|
||||
(await plugins.ruleRegistry.start()).getRacClientWithRequest(request),
|
||||
(await plugins.inference.start()).getClient({ request }),
|
||||
plugins
|
||||
.observabilityAIAssistant!.start()
|
||||
.then((observabilityAIAssistantStart) =>
|
||||
observabilityAIAssistantStart.service.getClient({ request, scopes: ['observability'] })
|
||||
),
|
||||
(await plugins.spaces?.start())?.spacesService.getSpaceId(request),
|
||||
plugins.apmDataAccess.setup.getApmIndices(soClient),
|
||||
uiSettingsClient.get(OBSERVABILITY_LOGS_DATA_ACCESS_LOG_SOURCES_ID) as Promise<string[]>,
|
||||
(await plugins.slo.start()).getSloClientWithRequest(request).getSummaryIndices(),
|
||||
]);
|
||||
|
||||
const next$ = runRootCauseAnalysis({
|
||||
alertsClient,
|
||||
connectorId,
|
||||
start,
|
||||
end,
|
||||
esClient,
|
||||
inferenceClient,
|
||||
indices: {
|
||||
logs: logSources,
|
||||
traces: [apmIndices.span, apmIndices.error, apmIndices.transaction],
|
||||
sloSummaries: sloSummaryIndices,
|
||||
},
|
||||
rulesClient,
|
||||
observabilityAIAssistantClient,
|
||||
serviceName,
|
||||
spaceId,
|
||||
context,
|
||||
logger,
|
||||
}).pipe(
|
||||
switchMap((event) => {
|
||||
return of({
|
||||
type: 'event' as const,
|
||||
event,
|
||||
});
|
||||
})
|
||||
);
|
||||
|
||||
if (completeInBackground) {
|
||||
const shared$ = next$.pipe(share());
|
||||
|
||||
shared$
|
||||
.pipe(
|
||||
toArray(),
|
||||
catchError(() => {
|
||||
return of();
|
||||
}),
|
||||
switchMap((events) => {
|
||||
return from(
|
||||
repository.save({
|
||||
...investigation,
|
||||
rootCauseAnalysis: {
|
||||
events: events.map(({ event }) => event),
|
||||
},
|
||||
})
|
||||
);
|
||||
})
|
||||
)
|
||||
.subscribe({
|
||||
error: (error) => {
|
||||
logger.error(`Failed to update investigation: ${error.message}`);
|
||||
logger.error(error);
|
||||
},
|
||||
});
|
||||
|
||||
return shared$;
|
||||
}
|
||||
|
||||
return next$;
|
||||
},
|
||||
});
|
|
@ -5,11 +5,23 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import { ObservabilityPluginSetup } from '@kbn/observability-plugin/server';
|
||||
import {
|
||||
import type { ObservabilityPluginSetup } from '@kbn/observability-plugin/server';
|
||||
import type {
|
||||
RuleRegistryPluginSetupContract,
|
||||
RuleRegistryPluginStartContract,
|
||||
} from '@kbn/rule-registry-plugin/server';
|
||||
import type { AlertingServerSetup, AlertingServerStart } from '@kbn/alerting-plugin/server/plugin';
|
||||
import type { SLOServerStart, SLOServerSetup } from '@kbn/slo-plugin/server';
|
||||
import type { InferenceServerStart, InferenceServerSetup } from '@kbn/inference-plugin/server';
|
||||
import type { SpacesPluginSetup, SpacesPluginStart } from '@kbn/spaces-plugin/server';
|
||||
import type {
|
||||
ApmDataAccessPluginStart,
|
||||
ApmDataAccessPluginSetup,
|
||||
} from '@kbn/apm-data-access-plugin/server';
|
||||
import type {
|
||||
ObservabilityAIAssistantServerStart,
|
||||
ObservabilityAIAssistantServerSetup,
|
||||
} from '@kbn/observability-ai-assistant-plugin/server';
|
||||
import { UsageCollectionSetup } from '@kbn/usage-collection-plugin/server';
|
||||
|
||||
/* eslint-disable @typescript-eslint/no-empty-interface*/
|
||||
|
@ -19,11 +31,23 @@ export interface ConfigSchema {}
|
|||
export interface InvestigateAppSetupDependencies {
|
||||
observability: ObservabilityPluginSetup;
|
||||
ruleRegistry: RuleRegistryPluginSetupContract;
|
||||
slo: SLOServerSetup;
|
||||
alerting: AlertingServerSetup;
|
||||
inference: InferenceServerSetup;
|
||||
spaces?: SpacesPluginSetup;
|
||||
apmDataAccess: ApmDataAccessPluginSetup;
|
||||
observabilityAIAssistant?: ObservabilityAIAssistantServerSetup;
|
||||
usageCollection: UsageCollectionSetup;
|
||||
}
|
||||
|
||||
export interface InvestigateAppStartDependencies {
|
||||
ruleRegistry: RuleRegistryPluginStartContract;
|
||||
slo: SLOServerStart;
|
||||
alerting: AlertingServerStart;
|
||||
inference: InferenceServerStart;
|
||||
spaces?: SpacesPluginStart;
|
||||
apmDataAccess: ApmDataAccessPluginStart;
|
||||
observabilityAIAssistant?: ObservabilityAIAssistantServerStart;
|
||||
}
|
||||
|
||||
export interface InvestigateAppServerSetup {}
|
||||
|
|
|
@ -17,57 +17,67 @@
|
|||
".storybook/**/*.js"
|
||||
],
|
||||
"kbn_references": [
|
||||
"@kbn/esql",
|
||||
"@kbn/core",
|
||||
"@kbn/data-views-plugin",
|
||||
"@kbn/expressions-plugin",
|
||||
"@kbn/kibana-utils-plugin",
|
||||
"@kbn/utility-types-jest",
|
||||
"@kbn/es-types",
|
||||
"@kbn/data-plugin",
|
||||
"@kbn/embeddable-plugin",
|
||||
"@kbn/unified-search-plugin",
|
||||
"@kbn/kibana-react-plugin",
|
||||
"@kbn/server-route-repository",
|
||||
"@kbn/server-route-repository-client",
|
||||
"@kbn/react-kibana-context-theme",
|
||||
"@kbn/shared-ux-link-redirect-app",
|
||||
"@kbn/kibana-react-plugin",
|
||||
"@kbn/i18n",
|
||||
"@kbn/embeddable-plugin",
|
||||
"@kbn/observability-ai-assistant-plugin",
|
||||
"@kbn/lens-plugin",
|
||||
"@kbn/esql",
|
||||
"@kbn/esql-utils",
|
||||
"@kbn/data-plugin",
|
||||
"@kbn/es-types",
|
||||
"@kbn/field-types",
|
||||
"@kbn/expressions-plugin",
|
||||
"@kbn/deeplinks-observability",
|
||||
"@kbn/logging",
|
||||
"@kbn/data-views-plugin",
|
||||
"@kbn/observability-shared-plugin",
|
||||
"@kbn/config-schema",
|
||||
"@kbn/investigate-plugin",
|
||||
"@kbn/dataset-quality-plugin",
|
||||
"@kbn/utility-types-jest",
|
||||
"@kbn/content-management-plugin",
|
||||
"@kbn/kibana-utils-plugin",
|
||||
"@kbn/visualization-utils",
|
||||
"@kbn/unified-search-plugin",
|
||||
"@kbn/es-query",
|
||||
"@kbn/server-route-repository",
|
||||
"@kbn/security-plugin",
|
||||
"@kbn/ui-actions-plugin",
|
||||
"@kbn/server-route-repository-utils",
|
||||
"@kbn/core-saved-objects-server",
|
||||
"@kbn/rule-registry-plugin",
|
||||
"@kbn/shared-ux-router",
|
||||
"@kbn/i18n",
|
||||
"@kbn/investigation-shared",
|
||||
"@kbn/core-security-common",
|
||||
"@kbn/saved-objects-finder-plugin",
|
||||
"@kbn/presentation-containers",
|
||||
"@kbn/lens-plugin",
|
||||
"@kbn/rule-registry-plugin",
|
||||
"@kbn/security-plugin",
|
||||
"@kbn/rule-data-utils",
|
||||
"@kbn/investigate-plugin",
|
||||
"@kbn/observability-utils-browser",
|
||||
"@kbn/lens-embeddable-utils",
|
||||
"@kbn/i18n-react",
|
||||
"@kbn/zod",
|
||||
"@kbn/observability-plugin",
|
||||
"@kbn/licensing-plugin",
|
||||
"@kbn/rule-data-utils",
|
||||
"@kbn/es-query",
|
||||
"@kbn/saved-objects-finder-plugin",
|
||||
"@kbn/presentation-containers",
|
||||
"@kbn/observability-ai-server",
|
||||
"@kbn/charts-plugin",
|
||||
"@kbn/observability-shared-plugin",
|
||||
"@kbn/core-security-common",
|
||||
"@kbn/deeplinks-observability",
|
||||
"@kbn/logging",
|
||||
"@kbn/esql-utils",
|
||||
"@kbn/observability-ai-assistant-plugin",
|
||||
"@kbn/observability-ai-assistant-app-plugin",
|
||||
"@kbn/content-management-plugin",
|
||||
"@kbn/dataset-quality-plugin",
|
||||
"@kbn/ui-actions-plugin",
|
||||
"@kbn/field-types",
|
||||
"@kbn/entities-schema",
|
||||
"@kbn/core-elasticsearch-server",
|
||||
"@kbn/observability-plugin",
|
||||
"@kbn/config-schema",
|
||||
"@kbn/visualization-utils",
|
||||
"@kbn/usage-collection-plugin",
|
||||
"@kbn/calculate-auto",
|
||||
"@kbn/ml-random-sampler-utils",
|
||||
"@kbn/charts-plugin",
|
||||
"@kbn/observability-utils-browser",
|
||||
"@kbn/usage-collection-plugin",
|
||||
"@kbn/zod",
|
||||
"@kbn/inference-common",
|
||||
"@kbn/core-elasticsearch-server",
|
||||
"@kbn/sse-utils",
|
||||
"@kbn/management-settings-ids",
|
||||
"@kbn/observability-utils-server",
|
||||
"@kbn/licensing-plugin",
|
||||
"@kbn/core-saved-objects-server",
|
||||
"@kbn/alerting-plugin",
|
||||
"@kbn/slo-plugin",
|
||||
"@kbn/inference-plugin",
|
||||
"@kbn/spaces-plugin",
|
||||
"@kbn/apm-data-access-plugin",
|
||||
],
|
||||
}
|
||||
|
|
|
@ -52,6 +52,7 @@ export function convertMessagesForInference(messages: Message[]): InferenceMessa
|
|||
}
|
||||
|
||||
inferenceMessages.push({
|
||||
name: message.message.name!,
|
||||
role: InferenceMessageRole.Tool,
|
||||
response: JSON.parse(message.message.content ?? '{}'),
|
||||
toolCallId: toolCallRequest.toolCalls![0].toolCallId,
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue