[8.x] [RCA] AI-assisted root cause analysis (#197200) (#203767)

# Backport This will backport the following commits from `main` to `8.x`: - [[RCA] AI-assisted root cause analysis (#197200)](https://github.com/elastic/kibana/pull/197200)  ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport)
2025-04-24 09:48:58 -04:00 · 2024-12-12 15:00:12 +01:00 · 2024-12-12 15:00:12 +01:00 · b3ba62a972
commit b3ba62a972
parent 10e01b4be4
144 changed files with 27287 additions and 358 deletions
--- a/.eslintrc.js
+++ b/.eslintrc.js
@ -919,6 +919,7 @@ module.exports = {
        'x-pack/plugins/observability_solution/exploratory_view/**/*.{js,mjs,ts,tsx}',
        'x-pack/plugins/observability_solution/ux/**/*.{js,mjs,ts,tsx}',
        'x-pack/plugins/observability_solution/slo/**/*.{js,mjs,ts,tsx}',
+        'x-pack/packages/observability/**/*.{js,mjs,ts,tsx}',
      ],
      rules: {
        'no-console': ['warn', { allow: ['error'] }],
@ -938,6 +939,7 @@ module.exports = {
        'x-pack/plugins/observability_solution/observability/**/*.stories.*',
        'x-pack/plugins/observability_solution/exploratory_view/**/*.stories.*',
        'x-pack/plugins/observability_solution/slo/**/*.stories.*',
+        'x-pack/packages/observability/**/*.{js,mjs,ts,tsx}',
      ],
      rules: {
        'react/function-component-definition': [
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@ -662,6 +662,8 @@ packages/kbn-object-versioning-utils @elastic/appex-sharedux
 x-pack/plugins/observability_solution/observability_ai_assistant_app @elastic/obs-ai-assistant
 x-pack/plugins/observability_solution/observability_ai_assistant_management @elastic/obs-ai-assistant
 x-pack/plugins/observability_solution/observability_ai_assistant @elastic/obs-ai-assistant
+x-pack/packages/observability/observability_ai/observability_ai_common @elastic/obs-ai-assistant
+x-pack/packages/observability/observability_ai/observability_ai_server @elastic/obs-ai-assistant
 x-pack/packages/observability/alert_details @elastic/obs-ux-management-team
 x-pack/packages/observability/alerting_rule_utils @elastic/obs-ux-management-team
 x-pack/packages/observability/alerting_test_data @elastic/obs-ux-management-team
--- a/package.json
+++ b/package.json
@ -693,6 +693,8 @@
    "@kbn/observability-ai-assistant-app-plugin": "link:x-pack/plugins/observability_solution/observability_ai_assistant_app",
    "@kbn/observability-ai-assistant-management-plugin": "link:x-pack/plugins/observability_solution/observability_ai_assistant_management",
    "@kbn/observability-ai-assistant-plugin": "link:x-pack/plugins/observability_solution/observability_ai_assistant",
+    "@kbn/observability-ai-common": "link:x-pack/packages/observability/observability_ai/observability_ai_common",
+    "@kbn/observability-ai-server": "link:x-pack/packages/observability/observability_ai/observability_ai_server",
    "@kbn/observability-alert-details": "link:x-pack/packages/observability/alert_details",
    "@kbn/observability-alerting-rule-utils": "link:x-pack/packages/observability/alerting_rule_utils",
    "@kbn/observability-alerting-test-data": "link:x-pack/packages/observability/alerting_test_data",
@ -1143,6 +1145,7 @@
    "fnv-plus": "^1.3.1",
    "formik": "^2.4.6",
    "fp-ts": "^2.3.1",
+    "fuse.js": "^7.0.0",
    "get-port": "^5.0.0",
    "getopts": "^2.2.5",
    "getos": "^3.1.0",
--- a/packages/kbn-es-types/index.ts
+++ b/packages/kbn-es-types/index.ts
@ -13,6 +13,7 @@ export type {
  SearchHit,
  ESSearchResponse,
  ESSearchRequest,
+  ESSearchRequestWithoutBody,
  ESSourceOptions,
  InferSearchResponseOf,
  AggregationResultOf,
--- a/packages/kbn-es-types/src/index.ts
+++ b/packages/kbn-es-types/src/index.ts
@ -8,6 +8,7 @@
 */

 import * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
+import * as estypesWithoutBody from '@elastic/elasticsearch/lib/api/types';
 import type {
  Field,
  QueryDslFieldAndFormat,
@ -26,6 +27,7 @@ import {

 export type ESFilter = estypes.QueryDslQueryContainer;
 export type ESSearchRequest = estypes.SearchRequest;
+export type ESSearchRequestWithoutBody = estypesWithoutBody.SearchRequest;
 export type AggregationOptionsByType = Required<estypes.AggregationsAggregationContainer>;

 // Typings for Elasticsearch queries and aggregations. These are intended to be
--- a/packages/kbn-es-types/src/search.ts
+++ b/packages/kbn-es-types/src/search.ts
@ -23,20 +23,15 @@ type InvalidAggregationRequest = unknown;
 // Union keys are not included in keyof, but extends iterates over the types in a union.
 type ValidAggregationKeysOf<T extends Record<string, any>> = T extends T ? keyof T : never;

-type KeyOfSource<T> = Record<
-  keyof T,
-  (T extends Record<string, { terms: { missing_bucket: true } }> ? null : never) | string | number
->;
+type KeyOfSource<T> = {
+  [key in keyof T]:
+    | (T[key] extends Record<string, { terms: { missing_bucket: true } }> ? null : never)
+    | string
+    | number;
+};

-type KeysOfSources<T extends any[]> = T extends [any]
-  ? KeyOfSource<T[0]>
-  : T extends [any, any]
-  ? KeyOfSource<T[0]> & KeyOfSource<T[1]>
-  : T extends [any, any, any]
-  ? KeyOfSource<T[0]> & KeyOfSource<T[1]> & KeyOfSource<T[2]>
-  : T extends [any, any, any, any]
-  ? KeyOfSource<T[0]> & KeyOfSource<T[1]> & KeyOfSource<T[2]> & KeyOfSource<T[3]>
-  : Record<string, null | string | number>;
+// convert to intersection to be able to get all the keys
+type KeysOfSources<T extends any[]> = UnionToIntersection<KeyOfSource<ValuesType<Pick<T, number>>>>;

 type CompositeKeysOf<TAggregationContainer extends AggregationsAggregationContainer> =
  TAggregationContainer extends {
--- a/packages/kbn-investigation-shared/src/rest_specs/update.ts
+++ b/packages/kbn-investigation-shared/src/rest_specs/update.ts
@ -24,6 +24,9 @@ const updateInvestigationParamsSchema = z.object({
      }),
      tags: z.array(z.string()),
      externalIncidentUrl: z.string().nullable(),
+      rootCauseAnalysis: z.object({
+        events: z.array(z.any()),
+      }),
    })
    .partial(),
 });
--- a/packages/kbn-investigation-shared/src/schema/investigation.ts
+++ b/packages/kbn-investigation-shared/src/schema/investigation.ts
@ -35,6 +35,11 @@ const investigationSchema = z.object({
  notes: z.array(investigationNoteSchema),
  items: z.array(investigationItemSchema),
  externalIncidentUrl: z.string().nullable(),
+  rootCauseAnalysis: z
+    .object({
+      events: z.array(z.any()),
+    })
+    .optional(),
 });

 type Status = z.infer<typeof statusSchema>;
--- a/packages/kbn-server-route-repository/src/register_routes.ts
+++ b/packages/kbn-server-route-repository/src/register_routes.ts
@ -98,8 +98,15 @@ export function registerRoutes<TDependencies extends Record<string, any>>({
        if (isKibanaResponse(result)) {
          return result;
        } else if (isObservable(result)) {
+          const controller = new AbortController();
+          request.events.aborted$.subscribe(() => {
+            controller.abort();
+          });
          return response.ok({
-            body: observableIntoEventSourceStream(result as Observable<ServerSentEvent>),
+            body: observableIntoEventSourceStream(result as Observable<ServerSentEvent>, {
+              logger,
+              signal: controller.signal,
+            }),
          });
        } else {
          const body = result || {};
--- a/packages/kbn-sse-utils-server/src/observable_into_event_source_stream.test.ts
+++ b/packages/kbn-sse-utils-server/src/observable_into_event_source_stream.test.ts
@ -0,0 +1,198 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+import { Logger } from '@kbn/logging';
+import { observableIntoEventSourceStream } from './observable_into_event_source_stream';
+import { PassThrough } from 'node:stream';
+import { Subject } from 'rxjs';
+import { ServerSentEvent, ServerSentEventType } from '@kbn/sse-utils/src/events';
+import {
+  ServerSentEventErrorCode,
+  createSSEInternalError,
+  createSSERequestError,
+} from '@kbn/sse-utils/src/errors';
+
+describe('observableIntoEventSourceStream', () => {
+  let logger: jest.Mocked<Logger>;
+
+  let controller: AbortController;
+
+  let stream: PassThrough;
+  let source$: Subject<ServerSentEvent>;
+
+  let data: string[];
+
+  beforeEach(() => {
+    jest.useFakeTimers();
+    logger = {
+      debug: jest.fn(),
+      error: jest.fn(),
+    } as unknown as jest.Mocked<Logger>;
+
+    controller = new AbortController();
+    source$ = new Subject();
+    data = [];
+
+    stream = observableIntoEventSourceStream(source$, { logger, signal: controller.signal });
+    stream.on('data', (chunk) => {
+      data.push(chunk.toString());
+    });
+  });
+
+  afterEach(() => {
+    jest.clearAllTimers();
+  });
+
+  it('writes events into the stream in SSE format', () => {
+    source$.next({ type: ServerSentEventType.data, data: { foo: 'bar' } });
+    source$.complete();
+
+    jest.runAllTimers();
+
+    expect(data).toEqual(['event: data\ndata: {"data":{"foo":"bar"}}\n\n']);
+  });
+
+  it('handles SSE errors', () => {
+    const sseError = createSSEInternalError('Invalid input');
+
+    source$.error(sseError);
+
+    jest.runAllTimers();
+
+    expect(logger.error).toHaveBeenCalledWith(sseError);
+    expect(logger.debug).toHaveBeenCalled();
+    const debugFn = logger.debug.mock.calls[0][0] as () => string;
+    const loggedError = JSON.parse(debugFn());
+    expect(loggedError).toEqual({
+      type: 'error',
+      error: {
+        code: ServerSentEventErrorCode.internalError,
+        message: 'Invalid input',
+        meta: {},
+      },
+    });
+
+    expect(data).toEqual([
+      `event: error\ndata: ${JSON.stringify({
+        error: {
+          code: ServerSentEventErrorCode.internalError,
+          message: 'Invalid input',
+          meta: {},
+        },
+      })}\n\n`,
+    ]);
+  });
+
+  it('handles SSE errors with metadata', () => {
+    const sseError = createSSERequestError('Invalid request', 400);
+
+    source$.error(sseError);
+
+    jest.runAllTimers();
+
+    expect(logger.error).toHaveBeenCalledWith(sseError);
+    expect(logger.debug).toHaveBeenCalled();
+    const debugFn = logger.debug.mock.calls[0][0] as () => string;
+    const loggedError = JSON.parse(debugFn());
+    expect(loggedError).toEqual({
+      type: 'error',
+      error: {
+        code: ServerSentEventErrorCode.requestError,
+        message: 'Invalid request',
+        meta: {
+          status: 400,
+        },
+      },
+    });
+
+    expect(data).toEqual([
+      `event: error\ndata: ${JSON.stringify({
+        error: {
+          code: ServerSentEventErrorCode.requestError,
+          message: 'Invalid request',
+          meta: {
+            status: 400,
+          },
+        },
+      })}\n\n`,
+    ]);
+  });
+
+  it('handles non-SSE errors', () => {
+    const error = new Error('Non-SSE Error');
+
+    source$.error(error);
+
+    jest.runAllTimers();
+
+    expect(logger.error).toHaveBeenCalledWith(error);
+    expect(data).toEqual([
+      `event: error\ndata: ${JSON.stringify({
+        error: {
+          code: ServerSentEventErrorCode.internalError,
+          message: 'Non-SSE Error',
+        },
+      })}\n\n`,
+    ]);
+  });
+
+  it('should send keep-alive comments every 10 seconds', () => {
+    jest.advanceTimersByTime(10000);
+    expect(data).toContain(': keep-alive');
+
+    jest.advanceTimersByTime(10000);
+    expect(data.filter((d) => d === ': keep-alive')).toHaveLength(2);
+  });
+
+  describe('without fake timers', () => {
+    beforeEach(() => {
+      jest.useFakeTimers({ doNotFake: ['nextTick'] });
+    });
+
+    it('should end the stream when the observable completes', async () => {
+      jest.useFakeTimers({ doNotFake: ['nextTick'] });
+
+      const endSpy = jest.fn();
+      stream.on('end', endSpy);
+
+      source$.complete();
+
+      await new Promise((resolve) => process.nextTick(resolve));
+
+      expect(endSpy).toHaveBeenCalled();
+    });
+
+    it('should end stream when signal is aborted', async () => {
+      const endSpy = jest.fn();
+      stream.on('end', endSpy);
+
+      // Emit some data
+      source$.next({ type: ServerSentEventType.data, data: { initial: 'data' } });
+
+      // Abort the signal
+      controller.abort();
+
+      // Emit more data after abort
+      source$.next({ type: ServerSentEventType.data, data: { after: 'abort' } });
+
+      await new Promise((resolve) => process.nextTick(resolve));
+
+      expect(endSpy).toHaveBeenCalled();
+
+      // Data after abort should not be received
+      expect(data).toEqual([
+        `event: data\ndata: ${JSON.stringify({ data: { initial: 'data' } })}\n\n`,
+      ]);
+    });
+
+    afterEach(() => {
+      jest.useFakeTimers();
+    });
+  });
+});
--- a/packages/kbn-sse-utils-server/src/observable_into_event_source_stream.ts
+++ b/packages/kbn-sse-utils-server/src/observable_into_event_source_stream.ts
@ -7,12 +7,51 @@
 * License v3.0 only", or the "Server Side Public License, v 1".
 */

-import { map, Observable } from 'rxjs';
+import { Logger } from '@kbn/logging';
+import {
+  isSSEError,
+  ServerSentErrorEvent,
+  ServerSentEventErrorCode,
+} from '@kbn/sse-utils/src/errors';
+import { ServerSentEvent, ServerSentEventType } from '@kbn/sse-utils/src/events';
+import { catchError, map, Observable, of } from 'rxjs';
 import { PassThrough } from 'stream';
-import { ServerSentEvent } from '@kbn/sse-utils';

-export function observableIntoEventSourceStream(source$: Observable<ServerSentEvent>): PassThrough {
-  const withSerializedEvents$ = source$.pipe(
+export function observableIntoEventSourceStream(
+  source$: Observable<ServerSentEvent>,
+  {
+    logger,
+    signal,
+  }: {
+    logger: Pick<Logger, 'debug' | 'error'>;
+    signal: AbortSignal;
+  }
+) {
+  const withSerializedErrors$ = source$.pipe(
+    catchError((error): Observable<ServerSentErrorEvent> => {
+      if (isSSEError(error)) {
+        logger.error(error);
+        logger.debug(() => JSON.stringify(error));
+        return of({
+          type: ServerSentEventType.error,
+          error: {
+            code: error.code,
+            message: error.message,
+            meta: error.meta,
+          },
+        });
+      }
+
+      logger.error(error);
+
+      return of({
+        type: ServerSentEventType.error,
+        error: {
+          code: ServerSentEventErrorCode.internalError,
+          message: error.message as string,
+        },
+      });
+    }),
    map((event) => {
      const { type, ...rest } = event;
      return `event: ${type}\ndata: ${JSON.stringify(rest)}\n\n`;
@ -21,18 +60,38 @@ export function observableIntoEventSourceStream(source$: Observable<ServerSentEv

  const stream = new PassThrough();

-  withSerializedEvents$.subscribe({
+  const intervalId = setInterval(() => {
+    // `:` denotes a comment - this is to keep the connection open
+    // it will be ignored by the SSE parser on the client
+    stream.write(': keep-alive');
+  }, 10000);
+
+  const subscription = withSerializedErrors$.subscribe({
    next: (line) => {
      stream.write(line);
    },
    complete: () => {
      stream.end();
+      clearTimeout(intervalId);
    },
    error: (error) => {
-      stream.write(`event: error\ndata: ${JSON.stringify(error)}\n\n`);
+      clearTimeout(intervalId);
+      stream.write(
+        `event:error\ndata: ${JSON.stringify({
+          error: {
+            code: ServerSentEventErrorCode.internalError,
+            message: error.message,
+          },
+        })}\n\n`
+      );
      stream.end();
    },
  });

+  signal.addEventListener('abort', () => {
+    subscription.unsubscribe();
+    stream.end();
+  });
+
  return stream;
 }
--- a/packages/kbn-sse-utils-server/tsconfig.json
+++ b/packages/kbn-sse-utils-server/tsconfig.json
@ -15,5 +15,6 @@
  ],
  "kbn_references": [
    "@kbn/sse-utils",
+    "@kbn/logging",
  ]
 }
--- a/packages/kbn-sse-utils/README.md
+++ b/packages/kbn-sse-utils/README.md
@ -21,7 +21,8 @@ function myRequestHandler(
        data: {
          anyData: {},
        },
-      })
+      }),
+      logger
    ),
  });
 }
--- a/tsconfig.base.json
+++ b/tsconfig.base.json
@ -1318,6 +1318,10 @@
      "@kbn/observability-ai-assistant-management-plugin/*": ["x-pack/plugins/observability_solution/observability_ai_assistant_management/*"],
      "@kbn/observability-ai-assistant-plugin": ["x-pack/plugins/observability_solution/observability_ai_assistant"],
      "@kbn/observability-ai-assistant-plugin/*": ["x-pack/plugins/observability_solution/observability_ai_assistant/*"],
+      "@kbn/observability-ai-common": ["x-pack/packages/observability/observability_ai/observability_ai_common"],
+      "@kbn/observability-ai-common/*": ["x-pack/packages/observability/observability_ai/observability_ai_common/*"],
+      "@kbn/observability-ai-server": ["x-pack/packages/observability/observability_ai/observability_ai_server"],
+      "@kbn/observability-ai-server/*": ["x-pack/packages/observability/observability_ai/observability_ai_server/*"],
      "@kbn/observability-alert-details": ["x-pack/packages/observability/alert_details"],
      "@kbn/observability-alert-details/*": ["x-pack/packages/observability/alert_details/*"],
      "@kbn/observability-alerting-rule-utils": ["x-pack/packages/observability/alerting_rule_utils"],
--- a/x-pack/packages/observability/observability_ai/observability_ai_common/jest.config.js
+++ b/x-pack/packages/observability/observability_ai/observability_ai_common/jest.config.js
@ -0,0 +1,15 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+module.exports = {
+  preset: '@kbn/test',
+  rootDir: '../../../../..',
+  roots: [
+    '<rootDir>/x-pack/packages/observability/observability_ai/observability_ai_common',
+    '<rootDir>/x-pack/packages/observability/observability_ai/observability_ai_server',
+  ],
+};
--- a/x-pack/packages/observability/observability_ai/observability_ai_common/kibana.jsonc
+++ b/x-pack/packages/observability/observability_ai/observability_ai_common/kibana.jsonc
@ -0,0 +1,7 @@
+{
+  "type": "shared-common",
+  "id": "@kbn/observability-ai-common",
+  "owner": "@elastic/obs-ai-assistant",
+  "group": "observability",
+  "visibility": "private"
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_common/package.json
+++ b/x-pack/packages/observability/observability_ai/observability_ai_common/package.json
@ -0,0 +1,6 @@
+{
+  "name": "@kbn/observability-ai-common",
+  "private": true,
+  "version": "1.0.0",
+  "license": "Elastic License 2.0"
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_common/root_cause_analysis/index.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_common/root_cause_analysis/index.ts
@ -0,0 +1,12 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export {
+  RCA_END_PROCESS_TOOL_NAME,
+  RCA_INVESTIGATE_ENTITY_TOOL_NAME,
+  RCA_OBSERVE_TOOL_NAME,
+} from './tool_names';
--- a/x-pack/packages/observability/observability_ai/observability_ai_common/root_cause_analysis/tool_names.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_common/root_cause_analysis/tool_names.ts
@ -0,0 +1,10 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export const RCA_OBSERVE_TOOL_NAME = 'observe';
+export const RCA_END_PROCESS_TOOL_NAME = 'endProcessAndWriteReport';
+export const RCA_INVESTIGATE_ENTITY_TOOL_NAME = 'investigateEntity';
--- a/x-pack/packages/observability/observability_ai/observability_ai_common/tsconfig.json
+++ b/x-pack/packages/observability/observability_ai/observability_ai_common/tsconfig.json
@ -0,0 +1,20 @@
+{
+  "extends": "../../../../../tsconfig.base.json",
+  "compilerOptions": {
+    "outDir": "target/types",
+    "types": [
+      "jest",
+      "node",
+      "react"
+    ]
+  },
+  "include": [
+    "**/*.ts",
+    "**/*.tsx",
+  ],
+  "exclude": [
+    "target/**/*"
+  ],
+  "kbn_references": [
+  ]
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/jest.config.js
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/jest.config.js
@ -0,0 +1,12 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+module.exports = {
+  preset: '@kbn/test',
+  rootDir: '../../../../..',
+  roots: ['<rootDir>/x-pack/packages/observability/observability_ai/observability_ai_server'],
+};
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/kibana.jsonc
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/kibana.jsonc
@ -0,0 +1,7 @@
+{
+  "type": "shared-server",
+  "id": "@kbn/observability-ai-server",
+  "owner": "@elastic/obs-ai-assistant",
+  "group": "observability",
+  "visibility": "private"
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/package.json
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/package.json
@ -0,0 +1,6 @@
+{
+  "name": "@kbn/observability-ai-server",
+  "private": true,
+  "version": "1.0.0",
+  "license": "Elastic License 2.0"
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/call_end_rca_process_tool.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/call_end_rca_process_tool.ts
@ -0,0 +1,51 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { from, Observable, of, switchMap } from 'rxjs';
+import { RCA_END_PROCESS_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis';
+import { AssistantMessage, MessageRole } from '@kbn/inference-common';
+import { writeFinalReport } from './tasks/write_final_report';
+import { EndProcessToolMessage, RootCauseAnalysisContext } from './types';
+import { generateSignificantEventsTimeline } from './tasks/generate_timeline';
+import { EMPTY_ASSISTANT_MESSAGE } from './empty_assistant_message';
+
+export function callEndRcaProcessTool({
+  rcaContext,
+  toolCallId,
+}: {
+  rcaContext: RootCauseAnalysisContext;
+  toolCallId: string;
+}): Observable<EndProcessToolMessage | AssistantMessage> {
+  return from(
+    writeFinalReport({
+      rcaContext,
+    })
+  ).pipe(
+    switchMap((report) => {
+      return from(
+        generateSignificantEventsTimeline({
+          rcaContext,
+          report,
+        }).then((timeline) => {
+          return { timeline, report };
+        })
+      );
+    }),
+    switchMap(({ report, timeline }) => {
+      const toolMessage: EndProcessToolMessage = {
+        name: RCA_END_PROCESS_TOOL_NAME,
+        role: MessageRole.Tool,
+        toolCallId,
+        response: {
+          report,
+          timeline,
+        },
+      };
+      return of(toolMessage, EMPTY_ASSISTANT_MESSAGE);
+    })
+  );
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/call_investigate_entity_tool.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/call_investigate_entity_tool.ts
@ -0,0 +1,80 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { from, Observable, of, switchMap } from 'rxjs';
+import { MessageRole } from '@kbn/inference-common';
+import { RCA_INVESTIGATE_ENTITY_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis';
+import { InvestigateEntityToolMessage, RootCauseAnalysisContext, ToolErrorMessage } from './types';
+import { investigateEntity } from './tasks/investigate_entity';
+import { formatEntity } from './util/format_entity';
+
+export function callInvestigateEntityTool({
+  field,
+  value,
+  context,
+  toolCallId,
+  rcaContext,
+}: {
+  field: string;
+  value: string;
+  context: string;
+  toolCallId: string;
+  rcaContext: RootCauseAnalysisContext;
+}): Observable<InvestigateEntityToolMessage | ToolErrorMessage> {
+  const nextEntity = {
+    [field]: value,
+  };
+
+  return from(
+    investigateEntity({
+      rcaContext,
+      entity: nextEntity,
+      context,
+    })
+  ).pipe(
+    switchMap((entityInvestigation) => {
+      if (!entityInvestigation) {
+        const entityNotFoundToolMessage: ToolErrorMessage = {
+          name: 'error',
+          role: MessageRole.Tool,
+          response: {
+            error: {
+              message: `Entity ${formatEntity(nextEntity)} not found, have
+            you verified it exists and if the field and value you are using
+            are correct?`,
+            },
+          },
+          toolCallId,
+        };
+
+        return of(entityNotFoundToolMessage);
+      }
+
+      const {
+        attachments,
+        relatedEntities,
+        entity: investigatedEntity,
+        summary,
+      } = entityInvestigation;
+      const toolMessage: InvestigateEntityToolMessage = {
+        name: RCA_INVESTIGATE_ENTITY_TOOL_NAME,
+        role: MessageRole.Tool as const,
+        toolCallId,
+        response: {
+          entity: investigatedEntity,
+          relatedEntities,
+          summary,
+        },
+        data: {
+          attachments,
+        },
+      };
+
+      return of(toolMessage);
+    })
+  );
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/call_observe_tool.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/call_observe_tool.ts
@ -0,0 +1,91 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { AssistantMessage, MessageRole } from '@kbn/inference-common';
+import {
+  RCA_INVESTIGATE_ENTITY_TOOL_NAME,
+  RCA_OBSERVE_TOOL_NAME,
+} from '@kbn/observability-ai-common/root_cause_analysis';
+import { compact, findLast } from 'lodash';
+import { from, Observable, of, switchMap } from 'rxjs';
+import { observeInvestigationResults } from './tasks/observe_investigation_results';
+import {
+  InvestigateEntityToolMessage,
+  ObservationToolMessage,
+  RootCauseAnalysisContext,
+  RootCauseAnalysisEvent,
+} from './types';
+
+export function callObserveTool({
+  rcaContext,
+  toolCallId,
+}: {
+  rcaContext: RootCauseAnalysisContext;
+  toolCallId: string;
+}): Observable<ObservationToolMessage> {
+  const { events } = rcaContext;
+
+  const lastAssistantMessage = findLast(
+    events.slice(0, -1),
+    (event): event is Extract<RootCauseAnalysisEvent, AssistantMessage> =>
+      event.role === MessageRole.Assistant
+  );
+
+  const toolMessagesByToolCallId = Object.fromEntries(
+    compact(
+      events.map((message) =>
+        'toolCallId' in message &&
+        (message.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME || message.name === 'error')
+          ? [message.toolCallId, message]
+          : undefined
+      )
+    )
+  );
+
+  const investigationToolMessages =
+    lastAssistantMessage && lastAssistantMessage.toolCalls
+      ? compact(
+          lastAssistantMessage.toolCalls.map((investigateEntityToolCall) => {
+            if (investigateEntityToolCall.function.name !== RCA_INVESTIGATE_ENTITY_TOOL_NAME) {
+              return undefined;
+            }
+            return {
+              toolCall: investigateEntityToolCall,
+              toolResponse: toolMessagesByToolCallId[investigateEntityToolCall.toolCallId],
+            };
+          })
+        )
+      : [];
+
+  const investigations = investigationToolMessages
+    .map((toolMessage) => toolMessage.toolResponse)
+    .filter(
+      (toolResponse): toolResponse is InvestigateEntityToolMessage =>
+        toolResponse.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME
+    )
+    .map((toolResponse) => ({ ...toolResponse.data, ...toolResponse.response }));
+
+  return from(
+    observeInvestigationResults({
+      rcaContext,
+      investigations,
+    })
+  ).pipe(
+    switchMap((summary) => {
+      const observationToolMessage: ObservationToolMessage = {
+        name: RCA_OBSERVE_TOOL_NAME,
+        response: {
+          content: summary.content,
+        },
+        data: summary,
+        role: MessageRole.Tool,
+        toolCallId,
+      };
+      return of(observationToolMessage);
+    })
+  );
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/empty_assistant_message.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/empty_assistant_message.ts
@ -0,0 +1,15 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { AssistantMessage, MessageRole } from '@kbn/inference-common';
+import { RootCauseAnalysisEvent } from './types';
+
+export const EMPTY_ASSISTANT_MESSAGE: Extract<RootCauseAnalysisEvent, AssistantMessage> = {
+  content: '',
+  role: MessageRole.Assistant,
+  toolCalls: [],
+};
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/index.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/index.ts
@ -0,0 +1,20 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export type {
+  RootCauseAnalysisEvent,
+  InvestigateEntityToolMessage,
+  EndProcessToolMessage,
+  ObservationToolMessage,
+  RootCauseAnalysisToolMessage,
+  ToolErrorMessage,
+  RootCauseAnalysisToolRequest,
+} from './types';
+export type { SignificantEventsTimeline, SignificantEvent } from './tasks/generate_timeline';
+export type { EntityInvestigation } from './tasks/investigate_entity';
+
+export { runRootCauseAnalysis } from './run_root_cause_analysis';
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/prompts/index.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/prompts/index.ts
@ -0,0 +1,345 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export const RCA_SYSTEM_PROMPT_BASE = `You are a helpful assistant for Elastic Observability.
+You are a distinguished SRE, who has an established career, working in both
+small shops and FAANG-level companies. You have worked with Elasticsearch
+since the beginning and expertly use it in your analysis of incidents.
+
+You use an evidence-based strategy to determine the root cause of
+an incident. You thoroughly analyze Observability data. You use your
+understanding of different architectures like microservies, monoliths,
+event-driven systems, and environments like Kubernetes to discover
+patterns and correlations in the data ingested into the user's system.
+
+Your sizable experience with monitoring software systems has taught
+you how to investigate issues and correlate symptoms of the investigate
+service with its dependencies.
+
+## Capabilities
+
+You are highly skilled at inspecting logs, traces, alerts, and SLOs to uncover
+the root cause of incidents, with a special emphasis on detecting log patterns
+that reveal system behavior. You can identify related entities, such as upstream
+services or the specific pod a service is running on, by searching through logs
+and traces for relationships using metadata like IP addresses, session IDs, or
+distributed tracing data. While you can analyze alerts and SLO-derived metrics,
+you do not directly analyze other system metrics, inspect files, or execute
+commands that modify the system.
+
+## Non-capabilities
+
+You lack the capabilities to analyze metrics or connect to external systems.`;
+
+export const RCA_PROMPT_ENTITIES = `# Entities
+
+In an Observability system, entities are distinct components or resources within
+the infrastructure, each representing points of interest for monitoring and
+troubleshooting. These entities form the backbone of log-based analysis and
+allow teams to track behavior, detect anomalies, and investigate issues across
+different layers of the system. Here’s a breakdown of common entities in
+observability:
+
+1. Services: Core units of functionality in an application ecosystem,
+representing individual processes or applications (e.g., user-authentication,
+payment processing). Services typically expose APIs or endpoints, and logs from
+these entities often capture requests, responses, and error events, which are
+critical for understanding application behavior.
+
+2. Kubernetes (K8s) Entities:
+   - Pods: The smallest deployable units in Kubernetes, usually containing one
+or more containers. Logs from pods provide insight into container operations,
+errors, and application states.
+   - Namespaces: Logical groupings within a cluster for organizing and isolating
+resources, helping in filtering logs by domain or responsibility.
+   - Nodes: Worker machines (either physical or virtual) where pods run. Node
+logs often cover hardware resource events, errors, and other system-level events
+relevant to pod health and performance.
+   - Deployments and ReplicaSets: Define and manage the desired state of pod
+replication and rolling updates. Logs from these components can reveal changes
+in application versions, scaling events, and configuration updates.
+
+3. Virtual Machines (VMs): Virtualized computing resources that generate
+operating system-level logs capturing events such as application crashes,
+network issues, and OS-related errors.
+
+4. Applications: Software systems or packages running across the infrastructure,n
+which may encompass multiple services. Logs from applications track user flows,
+application states, and error messages, providing context for user interactions
+and system events.
+
+5. Serverless Functions (e.g., AWS Lambda): Code executions triggered by
+specific events. Logs from serverless functions capture invocation details,
+execution paths, and error traces, which are useful for understanding specific
+function behaviors and pinpointing execution anomalies.
+
+6. Databases and Data Stores: Includes SQL/NoSQL databases, caches, and storage
+solutions. Logs from these entities cover query executions, connection issues,
+and transaction errors, essential for tracking data layer issues.
+
+7. Containers: Portable environments running individual services or processes.
+Container logs capture application and system events within the containerized
+environment, helping track process-level errors and status changes.
+
+8. Load Balancers and API Gateways: Components responsible for managing and
+routing traffic. Logs from these entities include request paths, status codes,
+and errors encountered, which can indicate connectivity issues or
+misconfigurations.
+
+9. Networking Components: Entities like virtual private clouds (VPCs),
+firewalls, VPNs, and network interfaces. Logs from these components track
+traffic flows, connectivity issues, and security events, crucial for identifying
+network-related anomalies.
+
+10. Clusters and Regions: Groupings of infrastructure either physically or
+logically, such as across data centers or cloud regions. Cluster and region logs
+help capture high-level events and error messages, useful for understanding
+system-wide issues and region-specific disruptions.
+
+Each of these entities is typically identified by fields such as
+\`service.name\`, \`kubernetes.pod.name\`, \`container.id\`, or similar fields
+in log records. Observability systems use these identifiers to connect entities,
+creating a map of relationships and dependencies that helps teams diagnose
+issues, understand cross-entity impacts, and uncover root causes in distributed
+architectures.`;
+
+export const RCA_PROMPT_DEPENDENCIES = `## Understanding the Flow: Upstream vs. Downstream
+
+- Upstream dependencies: These are the services that your service
+depends on. They supply data, perform tasks, or provide resources that
+your service consumes.
+- Downstream dependencies: These are the services that depend on your
+service. They consume the data or resources your service generates.
+
+When diagnosing issues, distinguishing the direction of dependency can
+clarify whether a problem originates from your service’s reliance on an
+external input or whether your service is causing issues for other systems.
+
+---
+
+## When to Investigate Upstream Dependencies
+
+Upstream issues typically occur when your service is failing due to problems
+with the responses it receives from external systems.
+
+1. Timeouts and Latency
+- Symptoms: Slow response times, retries, or timeouts.
+- Errors: HTTP 504, retrying connection, exceeded timeout threshold.
+- Focus: Check the performance and availability of upstream services
+(e.g., APIs, databases) and network latency.
+
+2. Data Integrity Issues**
+- Symptoms: Inconsistent or corrupted data.
+- Errors: unexpected data format, deserialization errors.
+- Focus: Verify data received from upstream services, and investigate
+schema or data format changes.
+
+3. Connection Failures
+- Symptoms: Your service cannot connect to upstream services.
+- Errors: DNS lookup failed, connection refused, socket timeout.
+- Focus: Check upstream service health, DNS, and networking components.
+
+4. Authentication/Authorization Failures**
+- Symptoms: Failed access to upstream resources.
+- Errors: 401 Unauthorized, 403 Forbidden, token issues.
+- Focus: Validate credentials or tokens and investigate upstream access
+policies.
+
+---
+
+## When to Investigate Downstream Dependencies
+
+Downstream issues occur when your service is functioning but its outputs cause
+failures in other services that depend on it.
+
+1. Data or API Response Issues
+- Symptoms: Downstream services receive bad or invalid data.
+- Errors: data type mismatch, invalid JSON format.
+- Focus: Ensure your service is returning correct data and check for API
+changes.
+
+2. Rate-Limiting and Resource Exhaustion**
+- Symptoms: Downstream services are overwhelmed.
+- Errors: 429 Too Many Requests, throttling or resource exhaustion.
+- Focus: Check your service’s request rates and resource usage (e.g., memory, CPU).
+
+3. Unexpected Behavior or Regression
+- Symptoms: Downstream failures after a recent deployment.
+- Errors: New downstream errors after your service changes.
+- Focus: Review recent updates, API contracts, or integration points.
+
+4. Eventual Consistency or Queue Backlogs
+- Symptoms: Delayed processing in downstream systems.
+- Errors: message queue full, backlog warnings.
+- Focus: Check event production rates and queue statuses in downstream services.`;
+
+export const RCA_PROMPT_CHANGES = `## Reasoning about Correlating Changes in Incident Investigations
+
+In a root cause analysis, understanding the types and timing of changes is key
+to linking symptoms with underlying causes. Changes can broadly be classified
+into **symptomatic changes** (indicators of system issues like elevated error
+rates or degraded throughput) and **system changes** (events that modify system
+configuration or structure, such as scale-downs, new version rollouts, or
+significant configuration adjustments). By correlating these changes, we can
+assess whether observed symptoms are likely related to specific system
+modifications.
+
+### Identifying Correlations Between Symptomatic and System Changes
+
+When investigating a sudden issue—such as a 5x increase in latency—it’s
+essential to evaluate both the **timing** and **nature** of associated changes
+in upstream dependencies, resource utilization, and configuration events. For
+instance:
+
+- Consistent Symptomatic Behavior: If an upstream dependency exhibits a
+similar, sustained latency spike around the same time and shows log entries
+indicating CPU throttling, this would suggest a correlated, persistent issue
+that may directly impact the observed symptom. A scale-down event preceding the
+latency increase might indicate that reduced resources are stressing the
+dependency.
+  
+- Transient vs. Persistent Issues: Another upstream dependency that
+experiences a brief latency increase but recovers quickly is less likely
+related. Short-lived changes that self-correct without intervention typically
+have different root causes or may be unrelated noise.
+
+### Types of Changes to Consider in Correlation
+
+1. Log Pattern Changes: A shift in log patterns, especially around error
+levels, provides significant insight. If there’s an increase in critical or
+warning log patterns for a dependency during the latency spike, it could
+indicate that the issue stems from this entity. Compare these log patterns to
+past behavior to assess whether they represent an anomaly that might warrant
+further investigation.
+
+2. Event-Driven System Changes:
+   - Scale Events: Scale-up or scale-down events can directly impact
+performance. If a latency increase aligns with a scale-down, it may suggest that
+resource reduction is straining the system.
+   - Release or Deployment Events: A new version rollout or config change is
+a frequent source of correlated issues. Compare the timing of the latency
+increase to the deployment to see if the change directly impacts the system.
+Correlate with alerts or SLO breaches on endpoints to understand the immediate
+effects of the release.
+
+3. SLO and Alert-Based Changes: SLO breaches and alerts can provide concrete
+timestamps for when symptoms begin. For instance, a breach on error rates for a
+specific service endpoint following a dependency’s scale-down event suggests a
+possible causal link. An alert indicating sustained latency increase in a
+dependency that remains unresolved points to a high-priority area for deeper
+investigation.
+
+4. Dependency Health and Behavior:
+   - Related vs. Unrelated Dependencies: Similar to the latency example,
+observe if multiple dependencies experience symptomatic changes simultaneously.
+Related dependencies should show consistent, similar issues, while unrelated
+dependencies may exhibit brief, unrelated spikes. Persistent issues across key
+dependencies likely indicate a systemic cause, while isolated changes are less
+likely to be relevant.
+
+### Examples of Reasoning Through Changes
+
+Consider these scenarios:
+- Increase in Error Rates and a Recent Deployment: Suppose error rates for
+an endpoint increase sharply post-deployment. If related logs show new error
+patterns, this aligns the symptom with a deployment change. Investigate specific
+changes in the deployment (e.g., code changes or resource allocation).
+- Throughput Decrease and Scaling Events: If throughput dips shortly after a
+scale-down event, it might suggest resource constraints. Analyze CPU or memory
+throttling logs from this period in upstream dependencies to confirm.
+- Cross-Service Latency Spikes: If multiple services along a call path
+experience latency spikes, with CPU throttling logs, this suggests a resource
+bottleneck. Trace logs and alerts related to autoscaling decisions may provide
+insights into whether the system configuration caused cascading delays.
+
+By carefully mapping these changes and analyzing their timing, you can
+distinguish between causally related events and incidental changes, allowing for
+a targeted and effective investigation.`;
+
+export const RCA_PROMPT_CHANGE_POINTS = `## Change points
+
+Change points can be defined as the following type:
+
+- \`dip\`: a significant dip occurs at this change point
+- \`distribution_change\`: the overall distribution of the values has changed
+significantly
+- \`non_stationary\`: there is no change point, but the values are not from a
+stationary distribution
+- \`spike\`: a significant spike occurs at this point
+- \`stationary\`: no change point found
+- \`step_change\`: the change indicates a statistically significant step up or
+down in value distribution
+- \`trend_change\`: there is an overall trend change occurring at this point
+
+For \`spike\`, and \`dip\`, this means: a short-lived spike or dip that then again
+stabilizes. For persisted changes, you'd see a \`step_change\` (if the values
+before and after the change point are stable), or a \`trend_change\` when the
+values show an upward or downward trend after the change.`;
+
+export const RCA_PROMPT_SIGNIFICANT_EVENTS = `## Significant events
+
+Generate a timeline of significant events. These events should capture
+significant observed changes in the system that can be extracted from the
+analyzed data. This timeline is absolutely critical to the investigation,
+and close attention has to be paid to the data, and the instructions.
+
+The timeline should focus on key events as captured in log patterns, including
+both notable changes and unusual/critical messages. This data-driven timeline
+should help establish a chain of causality, pinpointing when anomalies began,
+what system behaviors were observed, and how these patterns relate to the overall incident.
+
+- Use ISO timestamps to ensure precision and clarity.
+- Include alerts that are part of the investigation. For these, use the start
+time of the alert, and mention critical information about the alert, such as
+reason and grouping fields.
+- Focus on log entries that signal significant system behavior (e.g., errors,
+retries, anomalies).
+- Highlight critical log messages or changes in patterns that may correlate
+with the issue.
+- Include notable anomalies, such as spikes in error rates, unexpected system
+responses, or any log entries suggesting failure or degradation.
+
+Do not include:
+- Events that are indicative of normal operations.
+- Events that are unlikely to be related to the investigated issue.
+
+Key Elements to Include:
+
+- Log Patterns: Capture log messages that show unusual events or
+abnormalities such as error codes, failed retries, or changes in log frequency.
+- Timestamps: Ensure every entry in the timeline is time-stamped
+with an accurate ISO 8601 timestamp.
+- Event Description: Provide a clear, concise, and objective description of
+what was observed in the logs.
+- Corroborating Data: Link log anomalies to other relevant data points such
+as traffic shifts, request patterns, or upstream/downstream service impacts.`;
+
+export const RCA_PROMPT_TIMELINE_GUIDE = `
+The timeline should focus on key events as
+captured in log patterns, including both notable changes and unusual/critical
+messages. This data-driven timeline should help establish a chain of causality,
+pinpointing when anomalies began, what system behaviors were observed, and how
+these patterns relate to the overall incident.
+
+- Use ISO timestamps** to ensure precision and clarity.
+- Focus on log entries** that signal significant system behavior (e.g.,
+errors, retries, anomalies).
+- Highlight critical log messages** or changes in patterns that may correlate
+with the issue.
+- Include notable anomalies, such as spikes in error rates, unexpected
+system responses, or any log entries suggesting failure or degradation.
+
+Key Elements to Include:
+
+Log Patterns: Capture log messages that show unusual events or
+abnormalities such as error codes, failed retries, or changes in log frequency.
+Timestamps: Ensure every entry in the timeline is time-stamped
+with an accurate ISO 8601 timestamp.
+Event Description: Provide a clear, concise description of what was
+observed in the logs.
+Corroborating Data: Link log anomalies to other relevant data points such
+as traffic shifts, request patterns, or upstream/downstream service impacts.`;
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/run_root_cause_analysis.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/run_root_cause_analysis.ts
@ -0,0 +1,305 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { RulesClient } from '@kbn/alerting-plugin/server';
+import { calculateAuto } from '@kbn/calculate-auto';
+import { MessageRole, AssistantMessage, ToolMessage, ToolChoiceType } from '@kbn/inference-common';
+import { InferenceClient } from '@kbn/inference-plugin/server';
+import { Logger } from '@kbn/logging';
+import { AlertsClient } from '@kbn/rule-registry-plugin/server';
+import { findLast, pick } from 'lodash';
+import moment from 'moment';
+import { catchError, filter, from, map, mergeMap, Observable, of, switchMap } from 'rxjs';
+import { ObservabilityAIAssistantClient } from '@kbn/observability-ai-assistant-plugin/server';
+import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client';
+import {
+  RCA_END_PROCESS_TOOL_NAME,
+  RCA_INVESTIGATE_ENTITY_TOOL_NAME,
+  RCA_OBSERVE_TOOL_NAME,
+} from '@kbn/observability-ai-common/root_cause_analysis';
+import { callEndRcaProcessTool } from './call_end_rca_process_tool';
+import { callInvestigateEntityTool } from './call_investigate_entity_tool';
+import { callObserveTool } from './call_observe_tool';
+import { RCA_PROMPT_CHANGES, RCA_PROMPT_ENTITIES, RCA_SYSTEM_PROMPT_BASE } from './prompts';
+import { RCA_TOOLS } from './tools';
+import {
+  EndProcessToolMessage,
+  InvestigateEntityToolMessage,
+  ObservationToolMessage,
+  RootCauseAnalysisContext,
+  RootCauseAnalysisEvent,
+  ToolErrorMessage,
+} from './types';
+import { callTools } from './util/call_tools';
+import { formatEntity } from './util/format_entity';
+import { validateInvestigateEntityToolCalls } from './util/validate_investigate_entity_tool_call';
+
+const SYSTEM_PROMPT_WITH_OBSERVE_INSTRUCTIONS = `${RCA_SYSTEM_PROMPT_BASE}
+
+Your next step is to request an observation from another agent based
+on the initial context or the results of previous investigations.`;
+
+const SYSTEM_PROMPT_WITH_DECISION_INSTRUCTIONS = `${RCA_SYSTEM_PROMPT_BASE}
+
+${RCA_PROMPT_ENTITIES}
+
+${RCA_PROMPT_CHANGES}
+
+  To determine whether to end the process or continue analyzing another entity,
+follow the advice from the previous observation, and these tips:
+
+  Continuing the process:
+  - Do not investigate an entity twice. This will result in a failure.
+  - Logs, traces, or observability data that suggest upstream or downstream
+issues (such as connection failures, timeouts, or authentication errors)
+indicate further investigation is required.
+  
+  Ending the process:
+  - No further entities to investigate: If there are no unexplored upstream or
+downstream dependencies, and all related entities have been investigated without
+discovering new anomalies, it may be appropriate to end the process.
+  - If all investigated entities (e.g., services, hosts, containers) are
+functioning normally, with no relevant issues found, and there are no signs of
+dependencies being affected, you may consider ending the process.
+  - Avoid concluding the investigation based solely on symptoms or the absence
+of immediate errors in the data. Unless a system change has been connected to
+the incident, it is important to continue investigating dependencies to ensure
+the root cause has been accurately identified.`;
+
+export function runRootCauseAnalysis({
+  serviceName,
+  start: requestedStart,
+  end: requestedEnd,
+  esClient,
+  alertsClient,
+  rulesClient,
+  observabilityAIAssistantClient,
+  spaceId,
+  indices,
+  connectorId,
+  inferenceClient,
+  context: initialContext,
+  logger: incomingLogger,
+  prevEvents,
+}: {
+  context: string;
+  serviceName: string;
+  logger: Logger;
+  inferenceClient: InferenceClient;
+  start: number;
+  end: number;
+  alertsClient: AlertsClient;
+  rulesClient: RulesClient;
+  esClient: ObservabilityElasticsearchClient;
+  observabilityAIAssistantClient: ObservabilityAIAssistantClient;
+  indices: {
+    logs: string[];
+    traces: string[];
+    sloSummaries: string[];
+  };
+  connectorId: string;
+  spaceId: string;
+  prevEvents?: RootCauseAnalysisEvent[];
+}): Observable<RootCauseAnalysisEvent> {
+  const logger = incomingLogger.get('rca');
+
+  const entity = { 'service.name': serviceName };
+
+  const bucketSize = calculateAuto
+    .atLeast(30, moment.duration(requestedEnd - requestedStart))!
+    .asMilliseconds();
+
+  const start = Math.floor(requestedStart / bucketSize) * bucketSize;
+  const end = Math.floor(requestedEnd / bucketSize) * bucketSize;
+
+  const initialMessage = {
+    role: MessageRole.User as const,
+    content: `Investigate the health status of ${formatEntity(entity)}.
+    
+    The context given for this investigation is:
+
+    ${initialContext}`,
+  };
+
+  const nextEvents = [initialMessage, ...(prevEvents ?? [])];
+
+  const initialRcaContext: RootCauseAnalysisContext = {
+    connectorId,
+    start,
+    end,
+    esClient,
+    events: nextEvents,
+    indices,
+    inferenceClient,
+    initialContext,
+    alertsClient,
+    observabilityAIAssistantClient,
+    logger,
+    rulesClient,
+    spaceId,
+    tokenLimit: 32_000,
+  };
+
+  const investigationTimeRangePrompt = `## Time range
+  
+    The time range of the investigation is ${new Date(start).toISOString()} until ${new Date(
+    end
+  ).toISOString()}`;
+
+  initialContext = `${initialContext}
+
+  ${investigationTimeRangePrompt}
+  `;
+
+  const next$ = callTools(
+    {
+      system: RCA_SYSTEM_PROMPT_BASE,
+      connectorId,
+      inferenceClient,
+      messages: nextEvents,
+      logger,
+    },
+    ({ messages }) => {
+      const lastSuccessfulToolResponse = findLast(
+        messages,
+        (message) => message.role === MessageRole.Tool && message.name !== 'error'
+      ) as Exclude<ToolMessage, ToolErrorMessage> | undefined;
+
+      const shouldWriteObservationNext =
+        !lastSuccessfulToolResponse || lastSuccessfulToolResponse.name !== RCA_OBSERVE_TOOL_NAME;
+
+      const nextTools = shouldWriteObservationNext
+        ? pick(RCA_TOOLS, RCA_OBSERVE_TOOL_NAME)
+        : pick(RCA_TOOLS, RCA_END_PROCESS_TOOL_NAME, RCA_INVESTIGATE_ENTITY_TOOL_NAME);
+
+      const nextSystem = shouldWriteObservationNext
+        ? SYSTEM_PROMPT_WITH_OBSERVE_INSTRUCTIONS
+        : SYSTEM_PROMPT_WITH_DECISION_INSTRUCTIONS;
+
+      return {
+        messages,
+        system: `${nextSystem}
+
+        ${investigationTimeRangePrompt}`,
+        tools: nextTools,
+        toolChoice: shouldWriteObservationNext
+          ? { function: RCA_OBSERVE_TOOL_NAME }
+          : ToolChoiceType.required,
+      };
+    },
+    ({
+      toolCalls,
+      messages,
+    }): Observable<
+      | ObservationToolMessage
+      | ToolErrorMessage
+      | InvestigateEntityToolMessage
+      | EndProcessToolMessage
+      | AssistantMessage
+    > => {
+      const nextRcaContext = {
+        ...initialRcaContext,
+        events: messages as RootCauseAnalysisEvent[],
+      };
+
+      return of(undefined).pipe(
+        switchMap(() => {
+          return from(
+            validateInvestigateEntityToolCalls({ rcaContext: nextRcaContext, toolCalls })
+          );
+        }),
+        switchMap((errors) => {
+          if (errors.length) {
+            return of(
+              ...toolCalls.map((toolCall) => {
+                const toolCallErrorMessage: ToolErrorMessage = {
+                  role: MessageRole.Tool,
+                  name: 'error',
+                  response: {
+                    error: {
+                      message: `Some ${RCA_INVESTIGATE_ENTITY_TOOL_NAME} calls were not valid:
+                      ${errors.map((error) => `- ${error}`).join('\n')}`,
+                    },
+                  },
+                  toolCallId: toolCall.toolCallId,
+                };
+                return toolCallErrorMessage;
+              })
+            );
+          }
+          return of(...toolCalls).pipe(
+            mergeMap((toolCall) => {
+              function executeToolCall(): Observable<
+                | EndProcessToolMessage
+                | InvestigateEntityToolMessage
+                | ObservationToolMessage
+                | ToolErrorMessage
+                | AssistantMessage
+              > {
+                switch (toolCall.function.name) {
+                  case RCA_END_PROCESS_TOOL_NAME:
+                    return callEndRcaProcessTool({
+                      rcaContext: nextRcaContext,
+                      toolCallId: toolCall.toolCallId,
+                    });
+
+                  case RCA_INVESTIGATE_ENTITY_TOOL_NAME:
+                    return callInvestigateEntityTool({
+                      context: toolCall.function.arguments.context,
+                      field: toolCall.function.arguments.entity.field,
+                      value: toolCall.function.arguments.entity.value,
+                      rcaContext: nextRcaContext,
+                      toolCallId: toolCall.toolCallId,
+                    });
+
+                  case RCA_OBSERVE_TOOL_NAME:
+                    return callObserveTool({
+                      rcaContext: nextRcaContext,
+                      toolCallId: toolCall.toolCallId,
+                    });
+                }
+              }
+
+              return executeToolCall().pipe(
+                catchError((error) => {
+                  logger.error(`Failed executing task: ${error.message}`);
+                  logger.error(error);
+                  const toolErrorMessage: ToolErrorMessage = {
+                    name: 'error',
+                    role: MessageRole.Tool,
+                    response: {
+                      error: {
+                        ...('toJSON' in error && typeof error.toJSON === 'function'
+                          ? error.toJSON()
+                          : {}),
+                        message: error.message,
+                      },
+                    },
+                    toolCallId: toolCall.toolCallId,
+                  };
+                  return of(toolErrorMessage);
+                })
+              );
+            }, 3)
+          );
+        })
+      );
+    }
+  );
+
+  return next$.pipe(
+    filter((event) =>
+      Boolean(event.role !== MessageRole.Assistant || event.content || event.toolCalls?.length)
+    ),
+    map((event) => {
+      if (event.role === MessageRole.Assistant) {
+        return event as Extract<RootCauseAnalysisEvent, AssistantMessage>;
+      }
+      return event;
+    })
+  );
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/analyze_log_patterns/index.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/analyze_log_patterns/index.ts
@ -0,0 +1,402 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+import { getEntityKuery } from '@kbn/observability-utils-common/entities/get_entity_kuery';
+import { formatValueForKql } from '@kbn/observability-utils-common/es/format_value_for_kql';
+import type { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
+import { ShortIdTable } from '@kbn/observability-utils-common/llm/short_id_table';
+import {
+  P_VALUE_SIGNIFICANCE_HIGH,
+  P_VALUE_SIGNIFICANCE_MEDIUM,
+} from '@kbn/observability-utils-common/ml/p_value_to_label';
+import {
+  FieldPatternResultWithChanges,
+  getLogPatterns,
+} from '@kbn/observability-utils-server/entities/get_log_patterns';
+import { castArray, compact, groupBy, orderBy } from 'lodash';
+import { RCA_PROMPT_CHANGES, RCA_PROMPT_ENTITIES } from '../../prompts';
+import { RootCauseAnalysisContext } from '../../types';
+import { formatEntity } from '../../util/format_entity';
+import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries';
+import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
+
+type LogPatternRelevance = 'normal' | 'unusual' | 'warning' | 'critical';
+
+export type AnalyzedLogPattern = FieldPatternResultWithChanges & {
+  relevance: LogPatternRelevance;
+  interesting: boolean;
+};
+
+export interface AnalyzeLogPatternOutput {
+  ownPatterns: AnalyzedLogPattern[];
+  patternsFromOtherEntities: AnalyzedLogPattern[];
+}
+
+const normalDescription = `normal operations, such as such access logs`;
+const unusualDescription = `something unusual and/or
+appear rarely, such as startup or shutdown messages or
+other rare vents`;
+const warningDescription = `something being in an unexpected state,
+such as error messages, rate limiting or disk usage warnings`;
+const criticalDescription = `something being in a critical state,
+such as startup failure messages, out-of-memory errors or crashloopbackoff
+events`;
+
+interface LogPatternCutOff {
+  significance?: 'high' | 'medium' | 'low';
+  pValue?: number;
+}
+
+export async function analyzeLogPatterns({
+  entity,
+  allAnalysis,
+  system,
+  rcaContext: { logger: parentLogger, inferenceClient, connectorId, esClient, start, end, indices },
+  cutoff,
+  kbEntries,
+}: {
+  entity: Record<string, string>;
+  allAnalysis: Array<{ index: string | string[]; analysis: TruncatedDocumentAnalysis }>;
+  system: string;
+  cutoff?: LogPatternCutOff;
+  kbEntries: ScoredKnowledgeBaseEntry[];
+  rcaContext: Pick<
+    RootCauseAnalysisContext,
+    'indices' | 'logger' | 'inferenceClient' | 'connectorId' | 'esClient' | 'start' | 'end'
+  >;
+}): Promise<AnalyzeLogPatternOutput> {
+  const kuery = getEntityKuery(entity);
+
+  const logger = parentLogger.get('analyzeLogPatterns');
+
+  const fields = ['message', 'error.exception.message'];
+
+  logger.debug(() => `Analyzing log patterns for ${JSON.stringify(entity)}`);
+
+  const systemPrompt = `You are a helpful assistant for Elastic Observability.
+    You are an expert in analyzing log messages for software
+    systems, and you use your extensive experience as an SRE
+    to thoroughly analyze log patterns for things that require
+    attention from the user.
+
+    ${RCA_PROMPT_CHANGES}
+
+    ${RCA_PROMPT_ENTITIES}
+
+    ## Entity
+
+    The following entity is being analyzed:
+
+    ${formatEntity(entity)}
+
+    ${serializeKnowledgeBaseEntries(kbEntries)}
+
+    ### Entity analysis
+
+    ${allAnalysis.map(({ index: analyzedIndex, analysis }) => {
+      return `#### Indices: ${castArray(analyzedIndex).join(',')}
+
+  ${JSON.stringify(analysis)}`;
+    })}
+
+    ${system}`;
+
+  const kueryForOtherEntities = `NOT (${kuery}) AND ${Object.values(entity)
+    .map(
+      (val) =>
+        `(${fields.map((field) => `(${[field, formatValueForKql(val)].join(':')})`).join(' OR ')})`
+    )
+    .join(' AND ')}`;
+
+  const [logPatternsFromEntity, logPatternsFromElsewhere] = await Promise.all([
+    getLogPatterns({
+      esClient,
+      index: [...indices.logs, ...indices.traces],
+      start,
+      end,
+      kuery,
+      includeChanges: true,
+      fields,
+      metadata: [],
+    }),
+    getLogPatterns({
+      esClient,
+      index: [...indices.logs],
+      start,
+      end,
+      kuery: kueryForOtherEntities,
+      metadata: Object.keys(entity),
+      includeChanges: true,
+      fields,
+    }),
+  ]);
+  const patternIdLookupTable = new ShortIdTable();
+
+  logger.debug(
+    () =>
+      `Found ${logPatternsFromEntity.length} own log patterns and ${logPatternsFromElsewhere.length} from others`
+  );
+
+  logger.trace(
+    () =>
+      `Found log patterns${JSON.stringify({
+        entity,
+        logPatternsFromEntity,
+        logPatternsFromElsewhere,
+      })}`
+  );
+
+  const patternsWithIds = [...logPatternsFromEntity, ...logPatternsFromElsewhere].map((pattern) => {
+    return {
+      ...pattern,
+      shortId: patternIdLookupTable.take(pattern.regex),
+    };
+  });
+
+  const patternsByRegex = new Map(patternsWithIds.map((pattern) => [pattern.regex, pattern]));
+
+  const serializedOwnEntity = formatEntity(entity);
+
+  const [ownPatterns, patternsFromOtherEntities] = await Promise.all([
+    logPatternsFromEntity.length ? categorizeOwnPatterns() : [],
+    logPatternsFromElsewhere.length ? selectRelevantPatternsFromOtherEntities() : [],
+  ]);
+
+  logger.trace(
+    () =>
+      `Classified log patterns ${JSON.stringify([entity, ownPatterns, patternsFromOtherEntities])}`
+  );
+
+  const allPatterns = [...ownPatterns, ...patternsFromOtherEntities];
+
+  const sortedByPValueAsc = orderBy(
+    allPatterns.filter((pattern) => pattern.change && pattern.change.p_value),
+    (pattern) => {
+      return pattern.change.p_value;
+    },
+    'asc'
+  );
+
+  const pValueCutOff = getPValueCutoff({ cutoff, max: sortedByPValueAsc[0]?.change.p_value });
+
+  return {
+    ownPatterns: ownPatterns.map((pattern) => ({
+      ...pattern,
+      interesting: isInterestingPattern(pattern, pValueCutOff),
+    })),
+    patternsFromOtherEntities: patternsFromOtherEntities.map((pattern) => ({
+      ...pattern,
+      interesting: isInterestingPattern(pattern, pValueCutOff),
+    })),
+  };
+
+  function categorizeOwnPatterns() {
+    return inferenceClient
+      .output({
+        id: 'analyze_log_patterns',
+        connectorId,
+        system: systemPrompt,
+        input: `Based on the following log patterns from
+            ${formatEntity(entity)}, group these patterns into
+            the following categories:
+
+            - normal (patterns that are indicative of ${normalDescription})
+            - unusual (patterns that are indicative of ${unusualDescription})
+            - warning (patterns that are indicative of ${warningDescription})
+            - critical (patterns that are indicative of ${criticalDescription})
+
+            ## Log patterns:
+
+            ${preparePatternsForLlm(logPatternsFromEntity)}
+          `,
+        schema: {
+          type: 'object',
+          properties: {
+            categories: {
+              type: 'array',
+              items: {
+                type: 'object',
+                properties: {
+                  relevance: {
+                    type: 'string',
+                    enum: ['normal', 'unusual', 'warning', 'critical'],
+                  },
+                  shortIds: {
+                    type: 'array',
+                    description:
+                      'The pattern IDs you want to group here. Use the pattern short ID.',
+                    items: {
+                      type: 'string',
+                    },
+                  },
+                },
+                required: ['relevance', 'shortIds'],
+              },
+            },
+          },
+          required: ['categories'],
+        } as const,
+      })
+      .then((outputEvent) => {
+        return outputEvent.output.categories.flatMap((category) => {
+          return mapIdsBackToPatterns(category.shortIds).map((pattern) => {
+            return {
+              ...pattern,
+              relevance: category.relevance,
+            };
+          });
+        });
+      });
+  }
+
+  function selectRelevantPatternsFromOtherEntities() {
+    return inferenceClient
+      .output({
+        id: 'select_relevant_patterns_from_other_entities',
+        connectorId,
+        system: systemPrompt,
+        input: `Based on the following log patterns that
+            are NOT from ${serializedOwnEntity}, group these
+            patterns into the following categories:
+
+            - irrelevant (patterns that are not relevant for
+            ${serializedOwnEntity})
+            - normal (patterns that relevant for
+            ${serializedOwnEntity} and are indicative of ${normalDescription})
+            - unusual (patterns that are relevant for
+            ${serializedOwnEntity} and are indicative of ${unusualDescription})
+            - warning (patterns that are relevant for
+            ${serializedOwnEntity} and are indicative of ${warningDescription})
+            - critical (patterns that are relevant for
+            ${serializedOwnEntity} and are indicative of ${criticalDescription})
+
+            Relevant patterns are messages that mention the
+            investigated entity, or things that are indicative
+            of critical failures or changes in the entity
+            that owns the log pattern.
+
+            ## Log patterns:
+
+            ${preparePatternsForLlm(logPatternsFromElsewhere)}
+          `,
+        schema: {
+          type: 'object',
+          properties: {
+            categories: {
+              type: 'array',
+              items: {
+                type: 'object',
+                properties: {
+                  relevance: {
+                    type: 'string',
+                    enum: ['irrelevant', 'normal', 'unusual', 'warning', 'critical'],
+                  },
+                  shortIds: {
+                    type: 'array',
+                    description:
+                      'The pattern IDs you want to group here. Use the pattern short ID.',
+                    items: {
+                      type: 'string',
+                    },
+                  },
+                },
+                required: ['relevance', 'shortIds'],
+              },
+            },
+          },
+          required: ['categories'],
+        } as const,
+      })
+      .then((outputEvent) => {
+        return outputEvent.output.categories.flatMap((category) => {
+          return mapIdsBackToPatterns(category.shortIds).flatMap((pattern) => {
+            if (category.relevance === 'irrelevant') {
+              return [];
+            }
+            return [
+              {
+                ...pattern,
+                relevance: category.relevance,
+              },
+            ];
+          });
+        });
+      });
+  }
+
+  function preparePatternsForLlm(patterns: FieldPatternResultWithChanges[]): string {
+    const groupedByField = groupBy(patterns, (pattern) => pattern.field);
+
+    return Object.entries(groupedByField)
+      .map(([field, patternsForField]) => {
+        return `### \`${field}\`
+        
+        #### Patterns
+        
+        ${JSON.stringify(
+          patternsForField.map((pattern) => {
+            return {
+              shortId: patternIdLookupTable.take(pattern.regex),
+              regex: pattern.regex,
+              sample: pattern.sample,
+              highlight: pattern.highlight,
+              change: pattern.change,
+            };
+          })
+        )}
+        `;
+      })
+      .join('\n\n');
+  }
+
+  function mapIdsBackToPatterns(ids?: string[]) {
+    return compact(
+      ids?.map((shortId) => {
+        const lookupId = patternIdLookupTable.lookup(shortId);
+        if (!lookupId) {
+          return undefined;
+        }
+        const pattern = patternsByRegex.get(lookupId);
+        return pattern;
+      })
+    );
+  }
+}
+
+function isInterestingPattern(
+  pattern: Omit<AnalyzedLogPattern, 'interesting'>,
+  pValueCutOff: number
+) {
+  return (pattern.change.p_value ?? 1) <= pValueCutOff || pattern.relevance !== 'normal';
+}
+
+function getPValueCutoff({ max, cutoff }: { max?: number; cutoff?: LogPatternCutOff }) {
+  if (cutoff?.pValue) {
+    return cutoff?.pValue;
+  }
+
+  if (cutoff?.significance === 'high') {
+    return P_VALUE_SIGNIFICANCE_HIGH;
+  }
+
+  if (cutoff?.significance === 'medium') {
+    return P_VALUE_SIGNIFICANCE_MEDIUM;
+  }
+
+  if (max === undefined) {
+    return Number.MAX_VALUE;
+  }
+
+  if (max <= P_VALUE_SIGNIFICANCE_HIGH) {
+    return P_VALUE_SIGNIFICANCE_HIGH;
+  }
+
+  if (max <= P_VALUE_SIGNIFICANCE_MEDIUM) {
+    return P_VALUE_SIGNIFICANCE_MEDIUM;
+  }
+
+  return Number.MAX_VALUE;
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/describe_entity/index.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/describe_entity/index.ts
@ -0,0 +1,74 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { InferenceClient } from '@kbn/inference-plugin/server';
+import { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
+import { FieldPatternResultWithChanges } from '@kbn/observability-utils-server/entities/get_log_patterns';
+import { RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
+import { formatEntity } from '../../util/format_entity';
+import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries';
+import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
+import { getInvestigateEntityTaskPrompt } from '../investigate_entity/prompts';
+
+export async function describeEntity({
+  inferenceClient,
+  connectorId,
+  entity,
+  contextForEntityInvestigation,
+  analysis,
+  ownPatterns,
+  kbEntries,
+}: {
+  inferenceClient: InferenceClient;
+  connectorId: string;
+  entity: Record<string, string>;
+  analysis: TruncatedDocumentAnalysis;
+  contextForEntityInvestigation: string;
+  ownPatterns: FieldPatternResultWithChanges[];
+  kbEntries: ScoredKnowledgeBaseEntry[];
+}) {
+  const system = RCA_SYSTEM_PROMPT_BASE;
+
+  const input = `${getInvestigateEntityTaskPrompt({ entity, contextForEntityInvestigation })}
+
+    ## Context for investigating ${formatEntity(entity)}
+
+    ${contextForEntityInvestigation}
+
+    ${serializeKnowledgeBaseEntries(kbEntries)}
+
+    ## Data samples
+
+    ${JSON.stringify(analysis)}
+
+    ## Log patterns
+
+    ${JSON.stringify(ownPatterns.map(({ regex, sample }) => ({ regex, sample })))}
+
+    ## Current task
+
+    Describe the entity characteristics based on the sample documents and log
+    patterns. Put it in context of the investigation process. Mention the reason
+    why it's being investigated, and how it is related other entities that were
+    previously investigated. Mention these three things:
+
+    - infrastructure & environment
+    - communication characteristics (protocols and endpoints)
+    - context of entity in investigation
+
+    You shouldn't mention the log patterns, they will be analyzed elsewhere.
+  `;
+
+  const response = await inferenceClient.output({
+    id: 'describe_entity',
+    connectorId,
+    system,
+    input,
+  });
+
+  return response.content;
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/describe_log_patterns/index.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/describe_log_patterns/index.ts
@ -0,0 +1,189 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { InferenceClient } from '@kbn/inference-plugin/server';
+import { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
+import { omit, partition, sumBy } from 'lodash';
+import { RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
+import { formatEntity } from '../../util/format_entity';
+import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries';
+import { AnalyzedLogPattern } from '../analyze_log_patterns';
+import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
+import { getInvestigateEntityTaskPrompt } from '../investigate_entity/prompts';
+
+export interface LogPatternDescription {
+  content: string;
+  docCount: number;
+  interestingPatternCount: number;
+  ignoredPatternCount: number;
+  ignoredDocCount: number;
+}
+
+export async function describeLogPatterns({
+  inferenceClient,
+  connectorId,
+  entity,
+  contextForEntityInvestigation,
+  analysis,
+  ownPatterns: allOwnPatterns,
+  patternsFromOtherEntities,
+  kbEntries,
+}: {
+  inferenceClient: InferenceClient;
+  connectorId: string;
+  entity: Record<string, string>;
+  analysis: TruncatedDocumentAnalysis;
+  contextForEntityInvestigation: string;
+  ownPatterns: AnalyzedLogPattern[];
+  patternsFromOtherEntities: AnalyzedLogPattern[];
+  kbEntries: ScoredKnowledgeBaseEntry[];
+}): Promise<LogPatternDescription> {
+  const system = RCA_SYSTEM_PROMPT_BASE;
+
+  const [ownInterestingPatterns, ignoredOwnPatterns] = partition(
+    allOwnPatterns,
+    (pattern) => pattern.interesting
+  );
+
+  const stats = {
+    docCount: sumBy(allOwnPatterns, (pattern) => pattern.count),
+    interestingPatternCount: ownInterestingPatterns.length,
+    otherInterestingPatternCount: patternsFromOtherEntities.length,
+    ignoredPatternCount: ignoredOwnPatterns.length,
+    ignoredDocCount: sumBy(ignoredOwnPatterns, (pattern) => pattern.count),
+  };
+
+  const header = `## Log analysis
+  
+  ### Stats for own log patterns:  
+  - ${stats.docCount} documents analyzed
+  - ${stats.interestingPatternCount} interesting patterns
+  - ${stats.ignoredPatternCount} ignored patterns, accounting for
+  ${stats.ignoredDocCount} out of ${stats.docCount} documents
+  - ${stats.otherInterestingPatternCount} relevant patterns from
+  other entities`;
+
+  if (!stats.interestingPatternCount && !stats.otherInterestingPatternCount) {
+    return {
+      ...stats,
+      content: `${header}\n\nNo interesting log patterns`,
+    };
+  }
+
+  const ownLogPatternsPrompt = ownInterestingPatterns.length
+    ? JSON.stringify(
+        ownInterestingPatterns.map(({ regex, sample, change, count, timeseries }) => ({
+          regex,
+          sample,
+          change,
+          count,
+          timeseries: timeseries.map(({ x, y }, index) => {
+            if (index === change.change_point) {
+              return `${change.type} at ${new Date(x).toISOString()}: ${y}`;
+            }
+            return `${new Date(x).toISOString()}: ${y}`;
+          }),
+        }))
+      )
+    : 'No own log patterns found';
+
+  const otherLogPatternsPrompt = patternsFromOtherEntities.length
+    ? JSON.stringify(
+        patternsFromOtherEntities.map(
+          ({ regex, sample, change, count, timeseries, metadata, field, highlight }) => ({
+            regex,
+            sample,
+            change,
+            count,
+            timeseries: timeseries.map(({ x, y }, index) => {
+              if (index === change.change_point) {
+                return `${change.type} at ${new Date(x).toISOString()}: ${y}`;
+              }
+              return `${new Date(x).toISOString()}: ${y}`;
+            }),
+            entity: omit(metadata, field),
+            highlight,
+          })
+        )
+      )
+    : 'No relevant log patterns found from other entities';
+
+  const input = `${getInvestigateEntityTaskPrompt({ entity, contextForEntityInvestigation })}
+
+    ## Context for investigating ${formatEntity(entity)}
+
+    ${contextForEntityInvestigation}
+
+    ${serializeKnowledgeBaseEntries(kbEntries)}
+
+    ## Data samples
+
+    ${JSON.stringify(analysis)}
+
+    ## Log patterns from ${formatEntity(entity)}
+
+    ${ownLogPatternsPrompt}
+
+    ## Possibly relevant log patterns from other entities
+
+    ${otherLogPatternsPrompt}
+
+    ### Interpreting log patterns and samples
+
+    The pattern itself is what is consistent across all messages. The values from these parts
+    are separately given in "constants". There's also a single (random) _sample_ included, with
+    the variable part being given as well. E.g., if the failure in the sample is not part of the pattern
+    itself, you should mention that in your analysis.
+
+    ## Task
+
+    Using only the log patterns, describe your observations about the entity.
+
+    Group these pattterns together based on topic. Some examples of these topics:
+
+    - normal operations such as request logs
+    - connection issues to an upstream dependency
+    - startup messages
+    - garbage collection messages
+
+    For patterns with change points, describe the trend before and after the change point based
+    on the data points. E.g.:
+    - A persisted drop to near-zero after 2020-01-01T05:00:00.000Z
+    - A spike from 10 to 100 at 2020-01-01T05:00:00.000Z, which went back down
+    to the average after 2020-01-01T05:02:00.000Z
+    - A trend change after 2020-01-01T05:00:00.000Z. The values ranged from 10
+    to 20 before, but then after increased from 20 to 100 until
+    2020-01-01T05:02:00.000Z.
+
+    Do not:
+    - repeat the variables, instead, repeat the constants.
+    - repeat the timeseries as a whole, verbatim, in full. However, you can use individual data points + timestamps to illustrate the magnitude of the change, as in the example previously given.
+    - make up timestamps.
+    - do not separately list individual events if you have already mentioned
+    the pattern.
+  
+  Statistics:
+
+   - ${stats.interestingPatternCount} patterns from ${formatEntity(entity)}
+   were collected
+   - ${stats.docCount} logs were categorized
+   - ${stats.ignoredPatternCount} patterns were deemed uninteresting and accounted
+   for ${stats.ignoredDocCount} out of the total amount of logs
+  `;
+
+  const response = await inferenceClient.output({
+    id: 'describe_log_patterns',
+    connectorId,
+    system,
+    input,
+  });
+
+  return {
+    ...stats,
+    content: response.content,
+  };
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/analyze_fetched_related_entities.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/analyze_fetched_related_entities.ts
@ -0,0 +1,438 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { InferenceClient } from '@kbn/inference-plugin/server';
+import { Logger } from '@kbn/logging';
+import { getEntityKuery } from '@kbn/observability-utils-common/entities/get_entity_kuery';
+import {
+  DocumentAnalysis,
+  TruncatedDocumentAnalysis,
+} from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
+import { sortAndTruncateAnalyzedFields } from '@kbn/observability-utils-common/llm/log_analysis/sort_and_truncate_analyzed_fields';
+import { analyzeDocuments } from '@kbn/observability-utils-server/entities/analyze_documents';
+import { FieldPatternResultWithChanges } from '@kbn/observability-utils-server/entities/get_log_patterns';
+import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client';
+import { kqlQuery } from '@kbn/observability-utils-server/es/queries/kql_query';
+import { rangeQuery } from '@kbn/observability-utils-server/es/queries/range_query';
+import { chunk, isEmpty, isEqual } from 'lodash';
+import pLimit from 'p-limit';
+import {
+  RCA_PROMPT_DEPENDENCIES,
+  RCA_PROMPT_ENTITIES,
+  RCA_SYSTEM_PROMPT_BASE,
+} from '../../prompts';
+import { chunkOutputCalls } from '../../util/chunk_output_calls';
+import { formatEntity } from '../../util/format_entity';
+import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries';
+import { toBlockquote } from '../../util/to_blockquote';
+import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
+import { RelatedEntityKeywordSearch } from './write_keyword_searches_for_related_entities';
+
+export interface RelatedEntityFromSearchResults {
+  entity: { [x: string]: string };
+  highlight: Record<string, string[]>;
+  analysis: TruncatedDocumentAnalysis;
+}
+
+function getPromptForFoundEntity({ entity, analysis, highlight }: RelatedEntityFromSearchResults) {
+  return `## Entity: ${formatEntity(entity)}
+    
+      ${toBlockquote(`### Search highlights for ${formatEntity(entity)}
+      ${JSON.stringify(highlight)}`)}
+    `;
+}
+
+function getInputPromptBase({
+  entity,
+  analysis,
+  ownPatterns,
+  patternsFromOtherEntities,
+  searches,
+  context,
+  kbEntries,
+}: {
+  entity: Record<string, string>;
+  analysis: TruncatedDocumentAnalysis;
+  ownPatterns: FieldPatternResultWithChanges[];
+  patternsFromOtherEntities: FieldPatternResultWithChanges[];
+  searches: RelatedEntityKeywordSearch[];
+  context: string;
+  kbEntries: ScoredKnowledgeBaseEntry[];
+}) {
+  const otherPatternsPrompt = patternsFromOtherEntities.length
+    ? JSON.stringify(
+        patternsFromOtherEntities.map((pattern) => ({
+          sample: pattern.sample,
+          regex: pattern.regex,
+        }))
+      )
+    : 'No relevant log patterns from other entities found';
+  const logPatternsPrompt = ownPatterns.length
+    ? JSON.stringify(
+        ownPatterns.map((pattern) => {
+          return { sample: pattern.sample, regex: pattern.regex };
+        })
+      )
+    : 'No log patterns found';
+  return `Describe possible relationships to the investigated entity ${formatEntity(entity)}.
+
+  ## Context
+
+  ${toBlockquote(context)}
+
+  ${serializeKnowledgeBaseEntries(kbEntries)}
+
+  ## Data analysis
+  ${JSON.stringify(analysis)}
+  
+  ## Log patterns for ${formatEntity(entity)}
+
+  ${logPatternsPrompt}
+
+  ## Patterns from other entities
+
+  ${otherPatternsPrompt}
+
+  ## Search keywords
+
+  ${searches
+    .map(({ fragments, appearsAs }) => {
+      return `## Appears as: ${appearsAs}
+        
+        ### Fragments:
+        ${fragments.map((fragment) => `- \`${fragment}\``).join('\n')}`;
+    })
+    .join('\n')}`;
+}
+
+function getInputPromptInstructions({ entity }: { entity: Record<string, any> }) {
+  return `### Indicator strength
+
+In an Observability system, indicators of relationships between entities like
+services, hosts, users, or requests can vary in strength. Some indicators
+clearly define relationships, while others only suggest correlations. Here’s a
+breakdown of these indicators into strong, average, and weak categories, with an
+additional look at how weak indicators can become strong when combined.
+
+Strong indicators provide definitive links between entities. Distributed tracing
+IDs (trace, span, and parent) are among the strongest indicators, as they map
+the complete request path across services, showing exact service interactions.
+Session or user IDs are also strong indicators, capturing a user’s actions
+across services or hosts and revealing issues specific to particular users.
+
+Average indicators give helpful context but may require supporting data to
+clarify relationships. IP addresses, for instance, are moderately strong for
+tracking inter-service calls within controlled environments but are weaker
+across public or shared networks where IP reuse is common. URL paths also fall
+in this category; they link entities to specific endpoints or service functions
+and are moderately strong for tracking interactions between microservices with
+known APIs. Port numbers are another average indicator. While they suggest the
+service interaction type (HTTP, database), they generally need pairing with IP
+addresses or URLs for more accuracy, as port numbers alone are often shared
+across different services.
+
+Weak indicators are often too generic to imply a direct relationship but can
+suggest possible correlations. Host names, for example, are broad and typically
+cover a range of services or applications, especially in large clusters.
+Time-based indicators, such as timestamps or TTL values, suggest possible timing
+correlations but don’t establish a definitive link on their own. Status codes,
+like HTTP 500 errors, indicate issues but don’t specify causality, often
+requiring corroboration with stronger indicators like trace or session IDs.
+
+However, weak indicators can become strong when they appear together. For
+instance, a combination of IP address, port, and timestamp can strongly suggest
+a direct interaction between services, especially when the same combination is
+seen repeatedly or in conjunction with related URLs. Similarly, a host name
+combined with a unique URL path can strongly suggest that a specific service or
+pod is generating particular request patterns, even if each alone is too
+general.
+
+## Relevance to the investigation
+
+Given the context of the investigation, some entities might be very relevant
+even if there is no strong evidence of them being a direct dependency of
+${formatEntity(entity)}. For instance, the related entity might be an
+orchestrating entity, or it might be involved in a specific operation related
+to the ongoing issue.
+
+## Identifying entity relationships
+
+Your current task is to identify possible entity relationships for the
+investigated entity ${formatEntity(entity)}. You will get some context, document
+analysis for the investigated entity, and results from keyword searches that were
+extracted from the entity. Based on this data, list entities that could possibly
+be related to the given entity and/or the initial context. List the highly
+relevant entities first.
+
+## Output
+
+For each possible relationship, describe the following things:
+- The related entity (as a key-value pair)
+- The indicators you have observed as evidence of the relationship. Include the
+strength of the indicator, and the exact pieces of data that are related to it
+(field names and values, in both the investigated entity, and the possibly
+related entity).
+- Reason how the related entity is related to both ${formatEntity(entity)} as a
+dependency and the context. For instance, describe who is the caller and callee
+or whether that is unclear, based on the data, or explain how it might be
+related to the context.
+- The overall likeliness of it being a relevant entity.`;
+}
+
+export async function analyzeFetchedRelatedEntities({
+  connectorId,
+  inferenceClient,
+  esClient,
+  start,
+  end,
+  searches,
+  groupingFields,
+  index,
+  entity,
+  ownPatterns,
+  analysis,
+  patternsFromOtherEntities,
+  logger: parentLogger,
+  context,
+  kbEntries,
+}: {
+  connectorId: string;
+  inferenceClient: InferenceClient;
+  esClient: ObservabilityElasticsearchClient;
+  start: number;
+  end: number;
+  searches: RelatedEntityKeywordSearch[];
+  groupingFields: string[];
+  index: string | string[];
+  entity: Record<string, string>;
+  analysis: {
+    truncated: TruncatedDocumentAnalysis;
+    full: DocumentAnalysis;
+  };
+  ownPatterns: FieldPatternResultWithChanges[];
+  patternsFromOtherEntities: FieldPatternResultWithChanges[];
+  context: string;
+  logger: Logger;
+  kbEntries: ScoredKnowledgeBaseEntry[];
+}): Promise<{
+  summaries: string[];
+  foundEntities: RelatedEntityFromSearchResults[];
+}> {
+  const entityFields = Object.keys(entity);
+
+  const logger = parentLogger.get('findRelatedEntities');
+
+  logger.debug(
+    () => `Finding related entities: ${JSON.stringify({ entity, groupingFields, searches })}`
+  );
+
+  const allValuesFromEntity = Array.from(
+    new Set(analysis.full.fields.flatMap((field) => field.values))
+  );
+
+  const foundEntities = (
+    await Promise.all(
+      groupingFields.map((groupingField) => getResultsForGroupingField(groupingField))
+    )
+  ).flat();
+
+  logger.debug(() => `Found ${foundEntities.length} entities via keyword searches`);
+
+  const system = `${RCA_SYSTEM_PROMPT_BASE}
+  
+  ${RCA_PROMPT_ENTITIES}
+  
+  ${RCA_PROMPT_DEPENDENCIES}`;
+
+  const inputPromptBase = getInputPromptBase({
+    entity,
+    analysis: analysis.truncated,
+    ownPatterns,
+    patternsFromOtherEntities,
+    searches,
+    context,
+    kbEntries,
+  });
+
+  const foundEntityPrompts = foundEntities.map((foundEntity) => {
+    return {
+      text: getPromptForFoundEntity(foundEntity),
+      id: formatEntity(foundEntity.entity),
+    };
+  });
+
+  const inputPromptInstructions = getInputPromptInstructions({ entity });
+
+  // don't do more than 10 entities in a response, we'll run out of
+  // tokens
+  const requests = chunk(foundEntityPrompts, 10).flatMap((texts) =>
+    chunkOutputCalls({
+      system,
+      input: `${inputPromptBase} ${inputPromptInstructions}`,
+      texts,
+      tokenLimit: 32_000 - 6_000,
+    })
+  );
+
+  const allRelevantEntityDescriptions = await Promise.all(
+    requests.map(async (request) => {
+      const outputCompleteEvent = await inferenceClient.output({
+        id: 'describe_relevant_entities',
+        connectorId,
+        system: request.system,
+        input: `${inputPromptBase}
+          
+          # Found entities
+
+          ${request.texts.map((text) => text.text).join('\n\n')}
+
+          ${inputPromptInstructions}`,
+      });
+
+      return outputCompleteEvent.content;
+    })
+  );
+
+  return {
+    summaries: allRelevantEntityDescriptions,
+    foundEntities,
+  };
+
+  async function getResultsForGroupingField(
+    groupingField: string
+  ): Promise<RelatedEntityFromSearchResults[]> {
+    const excludeQuery = isEqual([groupingField], entityFields)
+      ? `NOT (${groupingField}:"${entity[groupingField]}")`
+      : ``;
+
+    const fieldCaps = await esClient.fieldCaps('check_if_grouping_field_exists', {
+      fields: [groupingField],
+      index,
+      index_filter: {
+        bool: {
+          filter: [...rangeQuery(start, end)],
+        },
+      },
+    });
+
+    if (isEmpty(fieldCaps.fields[groupingField])) {
+      return [];
+    }
+
+    const keywordSearchResults = await esClient.search(
+      'find_related_entities_via_keyword_searches',
+      {
+        track_total_hits: false,
+        index,
+        query: {
+          bool: {
+            must: [...rangeQuery(start, end), ...kqlQuery(excludeQuery)],
+            should: [
+              {
+                multi_match: {
+                  query: searches.flatMap((search) => search.fragments).join(' '),
+                  fields: '*',
+                },
+              },
+            ],
+            minimum_should_match: 1,
+          },
+        },
+        fields: [groupingField],
+        collapse: {
+          field: groupingField,
+        },
+        highlight: {
+          fields: {
+            '*': {},
+          },
+        },
+        _source: false,
+        size: 1_000,
+      }
+    );
+
+    if (!keywordSearchResults.hits.hits.length) {
+      logger.debug(() => `No hits: ${JSON.stringify({ entity, groupingField, searches })}`);
+      return [];
+    }
+
+    logger.trace(
+      () =>
+        `Hits: ${JSON.stringify({
+          entity,
+          groupingField,
+          searches,
+          count: keywordSearchResults.hits.hits.length,
+          hits: keywordSearchResults.hits.hits,
+        })}`
+    );
+
+    const limiter = pLimit(20);
+
+    const groupingFieldAnalysis = await Promise.all(
+      keywordSearchResults.hits.hits.map(async (hit) => {
+        return limiter(async () => {
+          const groupValue = hit.fields![groupingField][0] as string;
+
+          const analysisForGroupingField = await analyzeDocuments({
+            esClient,
+            start,
+            end,
+            index,
+            kuery: getEntityKuery({
+              [groupingField]: groupValue,
+            }),
+          });
+
+          const analysisWithRelevantValues = {
+            ...analysisForGroupingField,
+            fields: analysisForGroupingField.fields
+              .filter((field) => {
+                return !field.empty;
+              })
+              .map((field) => {
+                const valuesFoundInEntity = field.values.filter((value) => {
+                  return (
+                    allValuesFromEntity.includes(value) ||
+                    allValuesFromEntity.some((valueFromEntity) => {
+                      return (
+                        typeof valueFromEntity === 'string' &&
+                        typeof value === 'string' &&
+                        (value.includes(valueFromEntity) || valueFromEntity.includes(value))
+                      );
+                    })
+                  );
+                });
+                return {
+                  ...field,
+                  values: valuesFoundInEntity,
+                };
+              }),
+          };
+
+          return {
+            groupingField,
+            key: groupValue,
+            highlight: hit.highlight!,
+            analysis: sortAndTruncateAnalyzedFields(analysisWithRelevantValues),
+          };
+        });
+      })
+    );
+
+    return groupingFieldAnalysis.map(({ key, highlight, analysis: analysisForGroupingField }) => {
+      return {
+        entity: {
+          [groupingField]: key,
+        },
+        highlight,
+        analysis: analysisForGroupingField,
+      };
+    });
+  }
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/extract_related_entities.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/extract_related_entities.ts
@ -0,0 +1,159 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import stringify from 'json-stable-stringify';
+import pLimit from 'p-limit';
+import { RelatedEntityFromSearchResults } from '.';
+import {
+  RCA_PROMPT_DEPENDENCIES,
+  RCA_PROMPT_ENTITIES,
+  RCA_SYSTEM_PROMPT_BASE,
+} from '../../prompts';
+import { RootCauseAnalysisContext } from '../../types';
+import { formatEntity } from '../../util/format_entity';
+import { getPreviouslyInvestigatedEntities } from '../../util/get_previously_investigated_entities';
+import { toBlockquote } from '../../util/to_blockquote';
+
+export interface RelatedEntityDescription {
+  entity: Record<string, string>;
+  reason: string;
+  confidence: string;
+}
+
+export async function extractRelatedEntities({
+  entity,
+  entityReport,
+  summaries,
+  foundEntities,
+  context,
+  rcaContext: { events, connectorId, inferenceClient },
+}: {
+  foundEntities: RelatedEntityFromSearchResults[];
+  entity: Record<string, string>;
+  entityReport: string;
+  summaries: string[];
+  context: string;
+  rcaContext: Pick<RootCauseAnalysisContext, 'events' | 'connectorId' | 'inferenceClient'>;
+}): Promise<{ relatedEntities: RelatedEntityDescription[] }> {
+  const system = `${RCA_SYSTEM_PROMPT_BASE}
+  
+  ${RCA_PROMPT_ENTITIES}
+
+  ${RCA_PROMPT_DEPENDENCIES}`;
+
+  const previouslyInvestigatedEntities = getPreviouslyInvestigatedEntities({ events });
+
+  const previouslyInvestigatedEntitiesPrompt = previouslyInvestigatedEntities.length
+    ? `## Previously investigated entities
+
+    ${previouslyInvestigatedEntities
+      .map((prevEntity) => `- ${formatEntity(prevEntity)}`)
+      .join('\n')}`
+    : '';
+
+  const prompts = summaries.map((summary) => {
+    return `
+    # Investigated entity
+
+    ${formatEntity(entity)}
+
+    # Report
+
+    ${toBlockquote(entityReport)}
+
+    # Related entities report
+
+    ${toBlockquote(summary)}
+    
+    ${previouslyInvestigatedEntitiesPrompt}
+
+    # Context
+
+    ${context}
+
+    # Task
+
+    Your current task is to extract relevant entities as a data structure from the
+    related entities report. Order them by relevance to the investigation, put the
+    most relevant ones first.
+  `;
+  });
+
+  const limiter = pLimit(5);
+
+  const allEvents = await Promise.all(
+    prompts.map(async (input) => {
+      const completeEvent = await limiter(() =>
+        inferenceClient.output({
+          id: 'get_entity_relationships',
+          connectorId,
+          system,
+          input,
+          schema: {
+            type: 'object',
+            properties: {
+              related_entities: {
+                type: 'array',
+                items: {
+                  type: 'object',
+                  properties: {
+                    entity: {
+                      type: 'object',
+                      properties: {
+                        field: {
+                          type: 'string',
+                        },
+                        value: {
+                          type: 'string',
+                        },
+                      },
+                      required: ['field', 'value'],
+                    },
+                    reason: {
+                      type: 'string',
+                      description: 'Describe why this entity might be relevant. Provide evidence.',
+                    },
+                    confidence: {
+                      type: 'string',
+                      description:
+                        'Describe how confident you are in your conclusion about this relationship: low, moderate, high',
+                    },
+                  },
+
+                  required: ['entity', 'reason', 'confidence'],
+                },
+              },
+            },
+            required: ['related_entities'],
+          } as const,
+        })
+      );
+      return completeEvent.output;
+    })
+  );
+
+  const foundEntityIds = foundEntities.map(({ entity: foundEntity }) => stringify(foundEntity));
+
+  const relatedEntities = allEvents
+    .flat()
+    .flatMap((event) => {
+      return event.related_entities.map((item) => {
+        return {
+          entity: { [item.entity.field]: item.entity.value },
+          reason: item.reason,
+          confidence: item.confidence,
+        };
+      });
+    })
+    .filter((item) => {
+      return foundEntityIds.includes(stringify(item.entity));
+    });
+
+  return {
+    relatedEntities,
+  };
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/index.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/index.ts
@ -0,0 +1,97 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { InferenceClient } from '@kbn/inference-plugin/server';
+import { Logger } from '@kbn/logging';
+import {
+  DocumentAnalysis,
+  TruncatedDocumentAnalysis,
+} from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
+import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client';
+import { FieldPatternResultWithChanges } from '@kbn/observability-utils-server/entities/get_log_patterns';
+import {
+  analyzeFetchedRelatedEntities,
+  RelatedEntityFromSearchResults,
+} from './analyze_fetched_related_entities';
+import {
+  RelatedEntityKeywordSearch,
+  writeKeywordSearchForRelatedEntities,
+} from './write_keyword_searches_for_related_entities';
+import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
+
+export type { RelatedEntityFromSearchResults };
+
+export async function findRelatedEntities({
+  connectorId,
+  inferenceClient,
+  start,
+  end,
+  index,
+  esClient,
+  entity,
+  analysis,
+  logger,
+  context,
+  ownPatterns,
+  patternsFromOtherEntities,
+  kbEntries,
+}: {
+  connectorId: string;
+  inferenceClient: InferenceClient;
+  start: number;
+  end: number;
+  index: string | string[];
+  esClient: ObservabilityElasticsearchClient;
+  entity: Record<string, string>;
+  analysis: {
+    truncated: TruncatedDocumentAnalysis;
+    full: DocumentAnalysis;
+  };
+  logger: Logger;
+  context: string;
+  ownPatterns: FieldPatternResultWithChanges[];
+  patternsFromOtherEntities: FieldPatternResultWithChanges[];
+  kbEntries: ScoredKnowledgeBaseEntry[];
+}): Promise<{
+  searches: RelatedEntityKeywordSearch[];
+  summaries: string[];
+  foundEntities: RelatedEntityFromSearchResults[];
+}> {
+  const { groupingFields, searches } = await writeKeywordSearchForRelatedEntities({
+    connectorId,
+    inferenceClient,
+    entity,
+    analysis: analysis.truncated,
+    ownPatterns,
+    context,
+    kbEntries,
+  });
+
+  const { summaries, foundEntities } = await analyzeFetchedRelatedEntities({
+    entity,
+    connectorId,
+    start,
+    end,
+    esClient,
+    index,
+    inferenceClient,
+    searches,
+    groupingFields,
+    logger,
+    analysis,
+    ownPatterns,
+    patternsFromOtherEntities,
+    context,
+    kbEntries,
+  });
+
+  return {
+    searches,
+    summaries,
+    foundEntities,
+  };
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/write_keyword_searches_for_related_entities.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/write_keyword_searches_for_related_entities.ts
@ -0,0 +1,199 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { InferenceClient } from '@kbn/inference-plugin/server';
+import { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
+import { FieldPatternResultWithChanges } from '@kbn/observability-utils-server/entities/get_log_patterns';
+import { RCA_PROMPT_ENTITIES, RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
+import { formatEntity } from '../../util/format_entity';
+import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries';
+import { toBlockquote } from '../../util/to_blockquote';
+import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
+
+const SYSTEM_PROMPT_ADDENDUM = `# Guide: Constructing Keyword Searches to Find Related Entities
+
+When investigating issues like elevated failure rates for a
+specific endpoint, you can use the metadata at hand (IP addresses,
+URLs, session IDs, tracing IDs, etc.) to build targeted keyword searches.
+By extracting meaningful fragments from the data, you can correlate
+related services or hosts across distributed systems. Here’s how
+you can break down the metadata and format your searches.
+
+## Grouping fields
+
+Define grouping fields for the entities you want to extract. For
+instance, "service.name" if you are looking for services, or
+"kubernetes.pod.name" if you are looking for pods. Focus
+on services, unless you are looking for deployment or
+configuration changes.
+
+---
+
+## Key Metadata and Search Format
+
+### Example: Investigating a service failure for \`/api/products\`
+
+You can break down various pieces of metadata into searchable
+fragments. For each value, include a short description of its
+relationship to the investigation. This value will be used
+by the system to determine the relevance of a given entity
+that matches the search request.
+
+### 1. **IP Address and Port**
+- **Fragments:**
+  - \`"10.44.0.11:8080"\`: Full address.
+  - \`"10.44.0.11"\`: IP address only.
+  - \`"8080"\`: Port number.
+- **Appears as:** This IP address and port are referenced as
+<ip-field-name> and <port-field-name> in the investigated entity
+<entity-name>..
+
+### 2. **Outgoing Request URL**
+- **Fragments:**
+  - \`"http://called-service/api/product"\`: Full outgoing URL.
+  - \`"/api/product*"\`: Endpoint path.
+  - \`"called-service"\`: Service name of the upstream dependency.
+  - **Appears as:** These URL fragments appear as attributes.request.url
+  in the investigated entity <entity-name>. They could appear as referer
+  in the upstream dependency.
+
+### 3. **Parent and Span IDs**
+  - **Fragments:**
+    - \`"000aa"\`: Parent ID.
+    - \`"000bbb"\`: Span ID.
+  - **Relationship:** These ids appear as span.id and parent.id in the
+  investigated entity <entity-name>. They could be referring to spans
+  found on upstream or downstream services.
+
+---
+
+## Example Search Format in JSON
+
+To structure your keyword search, format the fragments and their
+relationships in a JSON array like this:
+
+\`\`\`json
+{
+  "groupingFields": [ "service.name" ],
+  "values": [
+    {
+      "fragments": [
+        "10.44.0.11:8080",
+        "10.44.0.11",
+        "8080"
+      ],
+      "appearsAs": "This IP address and port are referenced as <ip-field-name> and <port-field-name> in the investigated entity <entity-name>."
+    },
+    {
+      "fragments": [
+        "http://<upstream-service>/api/product",
+        "/api/product",
+        "<upstream-service>"
+      ],
+      "relationship": "These URL fragments appear as attributes.request.url in the investigated entity <entity-name>."
+    },
+    {
+      "fragments": [
+        "000aa",
+        "000bbb"
+      ],
+      "relationship": " These ids appear as span.id and parent.id in the investigated entity <entity-name>. They could be referring to spans found on upstream or downstream services"
+    }
+  ]
+}`;
+
+export interface RelatedEntityKeywordSearch {
+  fragments: string[];
+  appearsAs: string;
+}
+
+export async function writeKeywordSearchForRelatedEntities({
+  connectorId,
+  inferenceClient,
+  entity,
+  analysis,
+  ownPatterns,
+  context,
+  kbEntries,
+}: {
+  connectorId: string;
+  inferenceClient: InferenceClient;
+  entity: Record<string, string>;
+  analysis: TruncatedDocumentAnalysis;
+  ownPatterns: FieldPatternResultWithChanges[];
+  context: string;
+  kbEntries: ScoredKnowledgeBaseEntry[];
+}): Promise<{
+  groupingFields: string[];
+  searches: RelatedEntityKeywordSearch[];
+}> {
+  const logPatternsPrompt = ownPatterns.length
+    ? JSON.stringify(
+        ownPatterns.map((pattern) => ({ regex: pattern.regex, sample: pattern.sample }))
+      )
+    : 'No log patterns found';
+
+  return inferenceClient
+    .output({
+      id: 'extract_keyword_searches',
+      connectorId,
+      system: `${RCA_SYSTEM_PROMPT_BASE}
+
+        ${RCA_PROMPT_ENTITIES}`,
+      input: `Your current task is to to extract keyword searches
+        to find related entities to the entity ${formatEntity(entity)},
+        based on the following context:
+
+        ## Investigation context
+        ${toBlockquote(context)}
+
+        ${serializeKnowledgeBaseEntries(kbEntries)}
+
+        ## Data analysis
+        ${JSON.stringify(analysis)}
+
+        ## Log patterns 
+        
+        ${logPatternsPrompt}
+
+        ## Instructions
+        ${SYSTEM_PROMPT_ADDENDUM}`,
+      schema: {
+        type: 'object',
+        properties: {
+          groupingFields: {
+            type: 'array',
+            items: {
+              type: 'string',
+            },
+          },
+          searches: {
+            type: 'array',
+            items: {
+              type: 'object',
+              properties: {
+                fragments: {
+                  type: 'array',
+                  items: {
+                    type: 'string',
+                  },
+                },
+                appearsAs: {
+                  type: 'string',
+                  description:
+                    'Describe in what fields these values appear as in the investigated entity. You can mention multiple fields if applicable',
+                },
+              },
+              required: ['fragments', 'appearsAs'],
+            },
+          },
+        },
+        required: ['searches', 'groupingFields'],
+      } as const,
+    })
+    .then((event) => event.output);
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/generate_timeline/index.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/generate_timeline/index.ts
@ -0,0 +1,96 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
+import { RootCauseAnalysisContext } from '../../types';
+import { stringifySummaries } from '../../util/stringify_summaries';
+
+type SignificantEventSeverity = 'info' | 'unusual' | 'warning' | 'critical';
+
+type SignificantEventType = 'alert' | 'slo' | 'event';
+
+export interface SignificantEvent {
+  severity: SignificantEventSeverity;
+  '@timestamp'?: string;
+  description: string;
+  type: SignificantEventType;
+}
+
+export interface SignificantEventsTimeline {
+  events: SignificantEvent[];
+}
+
+export async function generateSignificantEventsTimeline({
+  report,
+  rcaContext,
+}: {
+  report: string;
+  rcaContext: RootCauseAnalysisContext;
+}): Promise<SignificantEventsTimeline> {
+  const { connectorId, inferenceClient } = rcaContext;
+
+  return await inferenceClient
+    .output({
+      id: 'generate_timeline',
+      system: RCA_SYSTEM_PROMPT_BASE,
+      connectorId,
+      input: `Your current task is to generate a timeline
+        of significant events, based on the given RCA report,
+        according to a structured schema. This timeline will
+        be presented to the user as a visualization.
+
+        ${stringifySummaries(rcaContext)}
+
+        # Report
+
+        ${report}
+    `,
+      schema: {
+        type: 'object',
+        properties: {
+          events: {
+            type: 'array',
+            items: {
+              type: 'object',
+              properties: {
+                timestamp: {
+                  type: 'string',
+                  description: 'The ISO timestamp of when the event occurred',
+                },
+                severity: {
+                  type: 'string',
+                  enum: ['info', 'unusual', 'warning', 'critical'],
+                },
+                type: {
+                  type: 'string',
+                  enum: ['alert', 'slo', 'event'],
+                },
+                description: {
+                  type: 'string',
+                  description: 'A description of the event',
+                },
+              },
+              required: ['severity', 'description'],
+            },
+          },
+        },
+        required: ['events'],
+      } as const,
+    })
+    .then((timelineCompleteEvent) => {
+      return {
+        events: timelineCompleteEvent.output.events.map((event) => {
+          return {
+            '@timestamp': event.timestamp,
+            severity: event.severity,
+            type: event.type ?? 'event',
+            description: event.description,
+          };
+        }),
+      };
+    });
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/get_knowledge_base_entries/index.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/get_knowledge_base_entries/index.ts
@ -0,0 +1,185 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { ShortIdTable } from '@kbn/observability-ai-assistant-plugin/common';
+import { decode, encode } from 'gpt-tokenizer';
+import { orderBy, sumBy } from 'lodash';
+import { RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
+import { RootCauseAnalysisContext } from '../../types';
+import { formatEntity } from '../../util/format_entity';
+import { toBlockquote } from '../../util/to_blockquote';
+
+export interface ScoredKnowledgeBaseEntry {
+  id: string;
+  text: string;
+  tokens: number;
+  score: number;
+  truncated?: {
+    tokens: number;
+    text: string;
+  };
+}
+
+export async function getKnowledgeBaseEntries({
+  entity,
+  context,
+  rcaContext,
+  maxTokens: maxTokensForEntries,
+}: {
+  entity: Record<string, string>;
+  context: string;
+  rcaContext: RootCauseAnalysisContext;
+  maxTokens: number;
+}): Promise<ScoredKnowledgeBaseEntry[]> {
+  const response = await rcaContext.observabilityAIAssistantClient.recall({
+    queries: [
+      ...Object.values(entity).map((value) => ({ text: value, boost: 3 })),
+      { text: context },
+    ],
+    limit: {
+      tokenCount: Number.MAX_VALUE,
+    },
+  });
+
+  const { inferenceClient, connectorId } = rcaContext;
+
+  const shortIdTable = new ShortIdTable();
+
+  const system = RCA_SYSTEM_PROMPT_BASE;
+
+  const input = `Re-order the attached documents, based on relevance to the context.
+  Score them between 1 and 5, based on their relative relevance to each other. The
+  most relevant doc should be scored 5, and the least relevant doc should be scored
+  1.
+
+  # Entity
+
+  ${formatEntity(entity)}
+  
+  # Context
+
+  ${toBlockquote(context)}
+  `;
+
+  const maxTokensForScoring = rcaContext.tokenLimit - encode(system + input).length - 1_000;
+
+  const entriesWithTokens = response.map((entry) => {
+    return {
+      id: entry.id,
+      text: entry.text,
+      tokens: encode(entry.text),
+    };
+  });
+
+  const totalTokenCount = sumBy(entriesWithTokens, (entry) => entry.tokens.length);
+
+  const truncatedEntriesWithShortIds = entriesWithTokens.map((entry) => {
+    const tokensForEntry = Math.floor(
+      (entry.tokens.length / totalTokenCount) * maxTokensForScoring
+    );
+
+    const truncatedText = decode(entry.tokens.slice(0, tokensForEntry));
+    const isTruncated = tokensForEntry < entry.tokens.length;
+
+    return {
+      id: entry.id,
+      tokens: entry.tokens,
+      shortId: shortIdTable.take(entry.id),
+      text: entry.text,
+      truncatedText,
+      isTruncated,
+    };
+  });
+
+  const scoredEntries = await inferenceClient.output({
+    id: 'score_entries',
+    connectorId,
+    system: RCA_SYSTEM_PROMPT_BASE,
+    input: `${input}
+        
+        ${truncatedEntriesWithShortIds
+          .map((entry) => {
+            return `# ID: ${entry.shortId}
+          
+          ## Text (${entry.isTruncated ? `truncated` : `not truncated `})
+
+          ${toBlockquote(entry.truncatedText)}
+          `;
+          })
+          .join('\n\n')}
+        `,
+    stream: false,
+    schema: {
+      type: 'object',
+      properties: {
+        docs: {
+          type: 'array',
+          items: {
+            type: 'object',
+            properties: {
+              score: {
+                type: 'number',
+                description:
+                  'A score between 1 and 5, with 5 being most relevant, and 1 being least relevant',
+              },
+              id: {
+                type: 'string',
+              },
+            },
+            required: ['score', 'id'],
+          },
+        },
+      },
+      required: ['docs'],
+    },
+  } as const);
+
+  const scoresById = new Map(scoredEntries.output.docs.map((doc) => [doc.id, doc.score]));
+
+  const entriesWithScore = truncatedEntriesWithShortIds.map((entry) => {
+    const score = scoresById.get(entry.shortId) ?? 0;
+    return {
+      ...entry,
+      score,
+    };
+  });
+
+  const sortedEntries = orderBy(entriesWithScore, (entry) => entry.score, 'desc');
+
+  const returnedEntries: ScoredKnowledgeBaseEntry[] = [];
+
+  const tokensLeft = maxTokensForEntries;
+
+  sortedEntries.forEach((entry) => {
+    if (entry.tokens.length <= tokensLeft) {
+      returnedEntries.push({
+        id: entry.id,
+        text: entry.text,
+        tokens: entry.tokens.length,
+        score: entry.score,
+      });
+      return;
+    }
+
+    const tokensToTake = tokensLeft;
+    if (tokensToTake > 0) {
+      const tookTokens = entry.tokens.slice(0, tokensToTake);
+      returnedEntries.push({
+        id: entry.id,
+        text: entry.text,
+        tokens: entry.tokens.length,
+        score: entry.score,
+        truncated: {
+          text: decode(tookTokens),
+          tokens: tookTokens.length,
+        },
+      });
+    }
+  });
+
+  return returnedEntries;
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/investigate_entity/index.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/investigate_entity/index.ts
@ -0,0 +1,268 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { getEntityKuery } from '@kbn/observability-utils-common/entities/get_entity_kuery';
+import { sortAndTruncateAnalyzedFields } from '@kbn/observability-utils-common/llm/log_analysis/sort_and_truncate_analyzed_fields';
+import { analyzeDocuments } from '@kbn/observability-utils-server/entities/analyze_documents';
+import { getDataStreamsForEntity } from '@kbn/observability-utils-server/entities/get_data_streams_for_entity';
+import { getAlertsForEntity } from '@kbn/observability-utils-server/entities/signals/get_alerts_for_entity';
+import { getSlosForEntity } from '@kbn/observability-utils-server/entities/signals/get_slos_for_entity';
+import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client';
+import { RootCauseAnalysisContext } from '../../types';
+import { stringifySummaries } from '../../util/stringify_summaries';
+import { analyzeLogPatterns } from '../analyze_log_patterns';
+import { describeEntity } from '../describe_entity';
+import { describeLogPatterns } from '../describe_log_patterns';
+import { findRelatedEntities } from '../find_related_entities';
+import { extractRelatedEntities } from '../find_related_entities/extract_related_entities';
+import { writeEntityInvestigationReport } from '../write_entity_investigation_report';
+import { EntityInvestigation } from './types';
+import { getKnowledgeBaseEntries } from '../get_knowledge_base_entries';
+
+export type { EntityInvestigation };
+
+export interface EntityInvestigationParameters {
+  entity: Record<string, string>;
+  rcaContext: RootCauseAnalysisContext;
+  context: string;
+}
+
+export async function investigateEntity(
+  parameters: EntityInvestigationParameters
+): Promise<EntityInvestigation | undefined> {
+  const {
+    entity,
+    rcaContext,
+    rcaContext: {
+      inferenceClient,
+      connectorId,
+      start,
+      end,
+      esClient,
+      logger: parentLogger,
+      indices,
+    },
+    context,
+  } = parameters;
+  const kuery = getEntityKuery(entity);
+
+  const logger = parentLogger.get('investigateEntity');
+
+  logger.debug(() => `Investigating entity: ${JSON.stringify(parameters.entity)}`);
+
+  const kbPromise = getKnowledgeBaseEntries({
+    entity,
+    context,
+    rcaContext,
+    maxTokens: 4_000,
+  }).catch((error) => {
+    logger.error(`Could not fetch entries from knowledge base`);
+    logger.error(error);
+    return [];
+  });
+
+  const [{ dataStreams }, alerts, slos] = await getSignals({ ...parameters, kuery });
+
+  logger.debug(
+    () =>
+      `Signals for entity ${JSON.stringify(entity)}: ${dataStreams.length} data streams, ${
+        alerts.length
+      } alerts, ${slos.length} slos`
+  );
+
+  if (!dataStreams.length) {
+    return undefined;
+  }
+
+  const fullAnalysis = await analyzeDataStreamsForEntity({
+    start,
+    end,
+    esClient,
+    kuery,
+    dataStreams,
+  });
+
+  const truncatedAnalysis = sortAndTruncateAnalyzedFields(fullAnalysis);
+
+  const kbEntries = await kbPromise;
+
+  const { ownPatterns, patternsFromOtherEntities } = await analyzeLogPatterns({
+    allAnalysis: [{ index: dataStreams, analysis: truncatedAnalysis }],
+    entity,
+    system: stringifySummaries(rcaContext),
+    cutoff: {
+      significance: 'high',
+    },
+    rcaContext,
+    kbEntries,
+  });
+
+  logger.trace(
+    () => `Analyzed log patterns: ${JSON.stringify({ ownPatterns, patternsFromOtherEntities })}`
+  );
+
+  const entityReportPromise = Promise.all([
+    describeEntity({
+      inferenceClient,
+      analysis: truncatedAnalysis,
+      connectorId,
+      contextForEntityInvestigation: context,
+      entity,
+      ownPatterns,
+      kbEntries,
+    }),
+    describeLogPatterns({
+      analysis: truncatedAnalysis,
+      connectorId,
+      contextForEntityInvestigation: context,
+      entity,
+      inferenceClient,
+      ownPatterns,
+      patternsFromOtherEntities,
+      kbEntries,
+    }),
+  ]).then(([entityDescription, logPatternDescription]) => {
+    return writeEntityInvestigationReport({
+      connectorId,
+      inferenceClient,
+      entityDescription,
+      logPatternDescription,
+      contextForEntityInvestigation: context,
+      entity,
+    }).then((report) => {
+      return {
+        description: entityDescription,
+        logPatternDescription,
+        report,
+      };
+    });
+  });
+
+  const [entityReport, relatedEntitiesResults] = await Promise.all([
+    entityReportPromise,
+    findRelatedEntities({
+      connectorId,
+      end,
+      entity,
+      esClient,
+      index: indices.logs,
+      inferenceClient,
+      logger,
+      start,
+      context,
+      analysis: {
+        full: fullAnalysis,
+        truncated: truncatedAnalysis,
+      },
+      ownPatterns,
+      patternsFromOtherEntities,
+      kbEntries,
+    }).then(async ({ searches, summaries, foundEntities }) => {
+      const report = await entityReportPromise;
+
+      const { relatedEntities } = await extractRelatedEntities({
+        entityReport: report.report,
+        summaries,
+        entity,
+        foundEntities,
+        context,
+        rcaContext,
+      });
+
+      return {
+        relatedEntities,
+        foundEntities,
+        searches,
+        summaries,
+      };
+    }),
+  ]);
+
+  return {
+    entity,
+    summary: [
+      entityReport.description,
+      entityReport.logPatternDescription.content,
+      entityReport.report,
+    ].join('\n\n'),
+    relatedEntities: relatedEntitiesResults.relatedEntities,
+    attachments: {
+      alerts,
+      slos,
+      analysis: truncatedAnalysis,
+      ownPatterns,
+      patternsFromOtherEntities,
+      searches: relatedEntitiesResults.searches,
+      relatedEntitiesSummaries: relatedEntitiesResults.summaries,
+      kbEntries,
+    },
+  };
+}
+
+async function getSignals({
+  entity,
+  kuery,
+  rcaContext: { start, end, esClient, rulesClient, alertsClient, indices, spaceId },
+}: {
+  kuery: string;
+  entity: Record<string, unknown>;
+  rcaContext: Pick<
+    RootCauseAnalysisContext,
+    'start' | 'end' | 'esClient' | 'rulesClient' | 'alertsClient' | 'indices' | 'spaceId'
+  >;
+}) {
+  return await Promise.all([
+    getDataStreamsForEntity({
+      esClient,
+      kuery,
+      index: indices.logs.concat(indices.traces),
+    }),
+    getAlertsForEntity({ entity, rulesClient, alertsClient, start, end, size: 10 }).then(
+      (alertsResponse) => {
+        return alertsResponse.hits.hits.map((hit) => hit._source!);
+      }
+    ),
+    getSlosForEntity({
+      entity,
+      start,
+      end,
+      esClient,
+      size: 1000,
+      sloSummaryIndices: indices.sloSummaries,
+      spaceId,
+    }).then((slosResponse) => {
+      return slosResponse.hits.hits.map((hit) => hit._source);
+    }),
+  ]);
+}
+
+async function analyzeDataStreamsForEntity({
+  start,
+  end,
+  dataStreams,
+  esClient,
+  kuery,
+}: {
+  start: number;
+  end: number;
+  kuery: string;
+  dataStreams: string[];
+  esClient: ObservabilityElasticsearchClient;
+}) {
+  const analysis = await analyzeDocuments({
+    esClient,
+    start,
+    end,
+    index: dataStreams,
+    kuery,
+  });
+
+  return {
+    ...analysis,
+    fields: analysis.fields.filter((field) => !field.empty),
+  };
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/investigate_entity/prompts.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/investigate_entity/prompts.ts
@ -0,0 +1,22 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { formatEntity } from '../../util/format_entity';
+import { toBlockquote } from '../../util/to_blockquote';
+
+export const getInvestigateEntityTaskPrompt = ({
+  entity,
+  contextForEntityInvestigation,
+}: {
+  entity: Record<string, string>;
+  contextForEntityInvestigation: string;
+}) => `## Entity-Based Investigation: Task Guide
+
+In the investigation process, you are currently investigating the entity
+${formatEntity(entity)}. The context given for this investigation is:
+
+${toBlockquote(contextForEntityInvestigation)}`;
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/investigate_entity/types.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/investigate_entity/types.ts
@ -0,0 +1,31 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { ParsedTechnicalFields } from '@kbn/rule-registry-plugin/common';
+import type { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis';
+import type { AnalyzeLogPatternOutput } from '../analyze_log_patterns';
+import type { RelatedEntityDescription } from '../find_related_entities/extract_related_entities';
+import type { RelatedEntityKeywordSearch } from '../find_related_entities/write_keyword_searches_for_related_entities';
+import type { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries';
+
+export interface EntityInvestigation {
+  entity: Record<string, string>;
+  summary: string;
+  relatedEntities: RelatedEntityDescription[];
+  attachments: {
+    analysis: TruncatedDocumentAnalysis;
+    slos: Array<
+      Record<string, any> & {
+        status: 'VIOLATED' | 'DEGRADED' | 'HEALTHY' | 'NO_DATA';
+      }
+    >;
+    alerts: ParsedTechnicalFields[];
+    searches: RelatedEntityKeywordSearch[];
+    relatedEntitiesSummaries: string[];
+    kbEntries: ScoredKnowledgeBaseEntry[];
+  } & AnalyzeLogPatternOutput;
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/observe_investigation_results/index.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/observe_investigation_results/index.ts
@ -0,0 +1,239 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { MessageRole } from '@kbn/inference-common';
+import { RCA_OBSERVE_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis';
+import { RCA_PROMPT_CHANGES, RCA_PROMPT_ENTITIES, RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
+import { ObservationToolMessage, RootCauseAnalysisContext } from '../../types';
+import { formatEntity } from '../../util/format_entity';
+import { getPreviouslyInvestigatedEntities } from '../../util/get_previously_investigated_entities';
+import { stringifySummaries } from '../../util/stringify_summaries';
+import { toBlockquote } from '../../util/to_blockquote';
+import { EntityInvestigation } from '../investigate_entity/types';
+
+const INITIAL_OBSERVATION_TASK_GUIDE = `Your current task is to write observations based on the initial context. You
+should acknowledge the context briefly, and mention key observations from the
+initial context. 
+
+Then, briefly describe what change you are looking for. Are the symptoms:
+
+- rapid, or gradual onset?
+- subtle or prounounced?
+
+If possible, mention the time of the change.
+
+When considering the initial context, reason about relevant changes to observe,
+such as short-lived versus persistent changes or singular events, like scale
+events, rollouts, or configuration changes.
+
+After, taking into account the capabilities you have, plan for next steps.
+
+Describe the next step, which is to investigate the entity found in the initial
+context. Only mention the entity (as a field/value). Do not mention any
+additional filters.
+
+Be brief, accurate, and critical.`;
+
+const INVESTIGATION_ADDENDUM = `
+**Task Guide: Observe the investigation results**
+
+You will receive one or more investigations. These investigations mention:
+- a general characterization of the entity based on its data
+- relevant log patterns
+- other signals, like SLOs or alerts
+- possibly related entities, and investigation suggestions
+
+First, you should briefly acknowledge the initial context of the investigation
+and where it stands. 
+
+Next, you should note key observations from the investigations, and how they relate
+to the ongoing investigation. 
+
+After, you should generate a timeline of significant events. For this timeline,
+include events from previous observations. Additionally, include significant
+events from the inspected investigations. Group events together in a topic
+if needed. Significant events are things like: an increase in errors, deployment
+events, a drop to zero for access logs, etc. In most cases, you do not want to
+mention individual log messages, unless it is a particularly significant event
+by itself.
+
+For each event, mention:
+
+- the timestamp of the event
+- the nature of the change, if applicable
+- data from the event, such as specific log patterns, alerts or slos
+- the meaning of the event and how it is related to the initial context
+
+Do not include:
+- the time range from the investigation itself (start/end)
+- other events that occurred during the investigation itself, like running
+log analysis or other patterns
+
+## Correlating significant events
+
+When correlating significant events, pay close attention to the timestamp of
+the mentioned change, and how it correlates to the timestamp of the change you
+want to correlate it to, such as the start time of an alert. An alert might be
+delayed, but if you see many changes around a specific timestamp, and some of
+them being significantly earlier, or later, the latter group is likely not
+relevant. 
+
+## Context and reasoning
+
+Next, use the timeline of events and the new observations to revise your
+analysis of the initial context and the ongoing investigation. Reason about
+how changes could be related: are they close in time, or far removed, compared
+to others? Is the type of change similar? Is the magnitude of the change similar?`;
+
+const SUGGEST_NEXT_STEPS_PROMPT = `
+Next, consider next steps. it's always important to contextualize the significant
+in the initial context of the investigation. Focus on your strongest pieces of
+evidence. Your observations should be related to finding out the cause of the
+initial context of the investigation - you should not concern yourself with the
+impact on _other_ entities.
+
+Suggest to conclude the process when:
+
+- there is a clear and obvious root cause
+- you have investigated more than 10 entities
+- OR you cannot find any unhealthy entities
+- there are no more entities to investigate
+
+If the conclusion is you need to continue your investigation, mention the entities
+that should be investigated. Do this only if there is a significant change one of
+the related entities will give you new insights into the root cause (instead of
+just the impact). DO NOT investigate an entity more than once.`;
+
+const CONCLUDE_PROCESS_PROMPT = `
+You must suggest to conclude the process and write the final report, as your
+capabilities do not allow you go investigate more entities.`;
+
+function getInitialPrompts(initialContext: string) {
+  return {
+    system: `${RCA_SYSTEM_PROMPT_BASE}
+
+    ${RCA_PROMPT_ENTITIES}
+
+    ${RCA_PROMPT_CHANGES}`,
+    input: `## Context
+    
+    ${initialContext}
+    
+    ${INITIAL_OBSERVATION_TASK_GUIDE}`,
+  };
+}
+
+function getObserveInvestigationsPrompts({
+  investigations,
+  summaries,
+  rcaContext,
+}: {
+  investigations: EntityInvestigation[];
+  summaries: ObservationStepSummary[];
+  rcaContext: RootCauseAnalysisContext;
+}) {
+  const previouslyInvestigatedEntities = getPreviouslyInvestigatedEntities(rcaContext);
+
+  const canContinue =
+    summaries.length <= 5 &&
+    investigations.filter((investigation) => 'summary' in investigation).length <= 10;
+
+  const investigationsPrompt = `Observe the following investigations that recently concluded:
+    ${investigations
+      .map((investigation, index) => {
+        return `## ${index + 1}: investigation of ${formatEntity(investigation.entity)}
+      
+      ${toBlockquote(investigation.summary)}
+
+      ${
+        investigation.relatedEntities.length
+          ? `### Relationships to ${formatEntity(investigation.entity)}
+      
+      ${toBlockquote(JSON.stringify(investigation.relatedEntities))}
+      
+      `
+          : ``
+      }
+      `;
+      })
+      .join('\n\n')}
+      
+  ${INVESTIGATION_ADDENDUM}
+
+  ${
+    canContinue
+      ? `${SUGGEST_NEXT_STEPS_PROMPT}
+  
+  ${
+    previouslyInvestigatedEntities.length
+      ? `The following entities have been investigated previously.
+      Do not investigate them again:
+  
+    ${previouslyInvestigatedEntities.map((entity) => `- ${JSON.stringify(entity)}`).join('\n')}`
+      : ``
+  }
+  
+  `
+      : CONCLUDE_PROCESS_PROMPT
+  }
+  
+  `;
+
+  const systemPrompt = `${RCA_SYSTEM_PROMPT_BASE}
+
+    ${RCA_PROMPT_ENTITIES}
+    
+    ${stringifySummaries(rcaContext)}`;
+
+  return {
+    system: systemPrompt,
+    input: investigationsPrompt,
+  };
+}
+
+export interface ObservationStepSummary {
+  investigations: EntityInvestigation[];
+  content: string;
+}
+
+export function observeInvestigationResults({
+  rcaContext,
+  rcaContext: { logger, events, initialContext, inferenceClient, connectorId },
+  investigations,
+}: {
+  rcaContext: RootCauseAnalysisContext;
+  investigations: EntityInvestigation[];
+}): Promise<ObservationStepSummary> {
+  const summaries = events
+    .filter((event): event is ObservationToolMessage => {
+      return event.role === MessageRole.Tool && event.name === RCA_OBSERVE_TOOL_NAME;
+    })
+    .map((event) => event.data);
+
+  logger.debug(
+    () =>
+      `Observing ${investigations.length} investigations (${summaries.length} previous summaries)`
+  );
+
+  const { system, input } = investigations.length
+    ? getObserveInvestigationsPrompts({ summaries, investigations, rcaContext })
+    : getInitialPrompts(initialContext);
+
+  return inferenceClient
+    .output({
+      id: 'observe',
+      system,
+      input,
+      connectorId,
+    })
+    .then((outputCompleteEvent) => {
+      return {
+        content: outputCompleteEvent.content,
+        investigations,
+      };
+    });
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/write_entity_investigation_report/index.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/write_entity_investigation_report/index.ts
@ -0,0 +1,84 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { InferenceClient } from '@kbn/inference-plugin/server';
+import { RCA_PROMPT_SIGNIFICANT_EVENTS, RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
+import { formatEntity } from '../../util/format_entity';
+import { toBlockquote } from '../../util/to_blockquote';
+import { LogPatternDescription } from '../describe_log_patterns';
+import { getInvestigateEntityTaskPrompt } from '../investigate_entity/prompts';
+
+export async function writeEntityInvestigationReport({
+  inferenceClient,
+  connectorId,
+  entity,
+  contextForEntityInvestigation,
+  entityDescription,
+  logPatternDescription,
+}: {
+  inferenceClient: InferenceClient;
+  connectorId: string;
+  entity: Record<string, string>;
+  contextForEntityInvestigation: string;
+  entityDescription: string;
+  logPatternDescription: LogPatternDescription;
+}): Promise<string> {
+  const system = RCA_SYSTEM_PROMPT_BASE;
+
+  const shouldGenerateTimeline = logPatternDescription.interestingPatternCount > 0;
+
+  let input = `${getInvestigateEntityTaskPrompt({ entity, contextForEntityInvestigation })}
+
+  ## Entity description
+
+  ${toBlockquote(entityDescription)}
+
+  ## Log pattern analysis
+
+  ${toBlockquote(logPatternDescription.content)}
+
+  # Current task
+
+  Your current task is to write a report the investigation into ${formatEntity(entity)}.
+  The log pattern analysis and entity description will be added to your report (at the
+  top), so you don't need to repeat anything in it.`;
+
+  if (shouldGenerateTimeline) {
+    input += `${RCA_PROMPT_SIGNIFICANT_EVENTS}\n\n`;
+  }
+
+  input += `## Context and reasoning
+
+  Reason about the role that the entity plays in the investigation, given the context.
+  mention evidence (hard pieces of data) when reasoning.
+  
+  Do not suggest next steps - this will happen in a follow-up task.`;
+
+  if (shouldGenerateTimeline) {
+    input += `## Format
+    
+    Your reply should only contain two sections: 
+    
+    - Timeline of significant events
+    - Context and reasoning
+    `;
+  } else {
+    input += `## Format
+    Your reply should only contain one section:
+    - Context and reasoning
+    `;
+  }
+
+  const response = await inferenceClient.output({
+    id: 'generate_entity_report',
+    connectorId,
+    input,
+    system,
+  });
+
+  return response.content;
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/write_final_report/index.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/write_final_report/index.ts
@ -0,0 +1,191 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { RCA_PROMPT_TIMELINE_GUIDE, RCA_SYSTEM_PROMPT_BASE } from '../../prompts';
+import { RootCauseAnalysisContext } from '../../types';
+import { stringifySummaries } from '../../util/stringify_summaries';
+
+const SYSTEM_PROMPT_ADDENDUM = `
+# Guide: Writing a Root Cause Analysis (RCA) Report
+
+A Root Cause Analysis (RCA) report is the final step in a thorough
+investigation. Its purpose is to provide a clear, evidence-backed explanation of
+the underlying cause of an issue, as well as the impact. Even if no definitive
+root cause is identified, the report should reflect the findings, the hypotheses
+considered, and why certain assumptions were rejected. This guide will help
+structure an RCA that distinguishes between cause and effect, organizes
+evidence, and presents a timeline of key events.
+
+---
+
+## 1. Introduction
+
+Start by summarizing the reason for the investigation. Provide a brief overview
+of the incident, the affected services or entities, and the initial alerts or
+issues that triggered the investigation. 
+
+- **What prompted the investigation?**
+- **Which entities were investigated?**
+- **Was there a specific hypothesis proposed at the outset?**
+
+### Example:
+- **Overview:** This RCA report investigates the elevated error rates in
+\`myservice\` and its downstream dependencies, first identified through an SLO
+breach for the \`/api/submit\` endpoint. The investigation considered multiple
+entities and possible causes, including resource exhaustion and upstream service
+failures.
+
+---
+
+## 2. Investigation Summary
+
+Summarize the key steps of the investigation, outlining:
+- **What hypotheses were proposed and why.**
+- **Which entities were investigated (e.g., \`myservice\`, \`myotherservice\`,
+\`notification-service\`).**
+- **Which hypotheses were discarded and why.**
+
+For each hypothesis, present the supporting or contradicting evidence.
+
+- **Strong Indicators:** Clear, repeated evidence pointing toward or against a
+hypothesis.
+- **Weak Indicators:** Inconsistent or ambiguous data that did not provide
+conclusive answers.
+
+#### Example Format:
+- **Hypothesis 1:** Resource exhaustion in \`myservice\` caused elevated error
+rates.
+  - **Evidence:**
+    - **Strong:** Memory usage exceeded 90% during the incident.
+    - **Weak:** CPU usage remained stable, making resource exhaustion a partial
+explanation.
+
+- **Hypothesis 2:** Upstream latency from \`myotherservice\` caused delays.
+  - **Evidence:**
+    - **Strong:** API logs showed frequent retries and timeouts from
+\`myotherservice\`.
+    - **Weak:** No errors were observed in \`myotherservice\` logs, suggesting an
+issue isolated to \`myservice\`.
+
+---
+
+## 3. Cause and Effect
+
+Differentiate between the **cause** (what initiated the issue) and the
+**effect** (the impact or symptoms seen across the system). The cause should
+focus on the root, while the effect describes the wider system response or
+failure.
+
+- **Root Cause:** Identify the underlying problem, supported by strong evidence.
+If no root cause is found, clearly state that the investigation did not lead to
+a conclusive root cause.
+  
+- **Impact:** Describe the downstream effects on other services, performance
+degradation, or SLO violations.
+
+#### Example:
+- **Cause:** The root cause of the elevated error rate was identified as a
+memory leak in \`myservice\` that gradually led to resource exhaustion.
+- **Effect:** This led to elevated latency and increased error rates at the
+\`/api/submit\` endpoint, impacting downstream services like
+\`notification-service\` that rely on responses from \`myservice\`.
+
+---
+
+## 4. Evidence for Root Cause
+
+Present a structured section summarizing all the evidence that supports the
+identified root cause. If no root cause is identified, outline the most
+significant findings that guided or limited the investigation.
+
+- **Log Patterns:** Describe any abnormal log patterns observed, including
+notable change points.
+- **Alerts and SLOs:** Mention any alerts or breached SLOs that were triggered,
+including their relevance to the investigation.
+- **Data Analysis:** Include any data trends or patterns that were analyzed
+(e.g., resource usage spikes, network traffic).
+
+#### Example:
+- **Memory Usage:** Logs showed a steady increase in memory consumption starting
+at 10:00 AM, peaking at 12:00 PM, where memory usage surpassed 90%, triggering
+the alert.
+- **Error Rate Logs:** Error rates for \`/api/submit\` began increasing around
+11:30 AM, correlating with the memory pressure in \`myservice\`.
+- **API Logs:** \`myotherservice\` API logs showed no internal errors, ruling out
+an upstream dependency as the primary cause.
+
+---
+
+## 5. Proposed Impact
+
+Even if the root cause is clear, it is important to mention the impact of the
+issue on the system, users, and business operations. This includes:
+- **Affected Services:** Identify the services impacted (e.g., downstream
+dependencies).
+- **Performance Degradation:** Describe any SLO breaches or performance
+bottlenecks.
+- **User Impact:** Explain how users or clients were affected (e.g., higher
+latency, failed transactions).
+
+#### Example:
+- **Impact:** The memory leak in \`myservice\` caused service degradation over a
+2-hour window. This affected \`/api/submit\`, causing delays and failed
+requests, ultimately impacting user-facing services relying on that endpoint.
+
+---
+
+## 6. Timeline of Significant Events
+
+${RCA_PROMPT_TIMELINE_GUIDE}
+
+---
+
+## 7. Conclusion and Next Steps
+
+Summarize the conclusions of the investigation:
+- If a root cause was identified, confirm it with the strongest supporting
+evidence.
+- If no root cause was found, state that clearly and suggest areas for further
+investigation or monitoring.
+
+Finally, outline the next steps:
+- **Fixes or Mitigations:** Recommend any immediate actions (e.g., patch
+deployment, configuration changes).
+- **Monitoring Improvements:** Suggest new alerts or monitoring metrics based on
+lessons learned.
+- **Further Investigations:** If necessary, propose any follow-up investigations
+to gather more evidence.
+
+#### Example:
+- **Conclusion:** The root cause of the incident was a memory leak in
+\`myservice\`, leading to resource exhaustion and elevated error rates at
+\`/api/submit\`. The leak has been patched, and monitoring has been improved to
+detect memory spikes earlier.
+- **Next Steps:** Monitor memory usage for the next 24 hours to ensure no
+recurrence. Investigate adding a memory ceiling for \`myservice\` to prevent
+future resource exhaustion.`;
+
+export async function writeFinalReport({
+  rcaContext,
+}: {
+  rcaContext: RootCauseAnalysisContext;
+}): Promise<string> {
+  const { inferenceClient, connectorId } = rcaContext;
+
+  return await inferenceClient
+    .output({
+      id: 'write_final_report',
+      connectorId,
+      system: `${RCA_SYSTEM_PROMPT_BASE}
+        
+        ${SYSTEM_PROMPT_ADDENDUM}`,
+      input: `Write the RCA report, based on the observations.
+        
+        ${stringifySummaries(rcaContext)}`,
+    })
+    .then((event) => event.content);
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tools.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tools.ts
@ -0,0 +1,77 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import {
+  RCA_END_PROCESS_TOOL_NAME,
+  RCA_INVESTIGATE_ENTITY_TOOL_NAME,
+  RCA_OBSERVE_TOOL_NAME,
+} from '@kbn/observability-ai-common/root_cause_analysis/tool_names';
+
+export const RCA_TOOLS = {
+  [RCA_OBSERVE_TOOL_NAME]: {
+    description: `Request an observation from another agent on
+    the results of the returned investigations. The title should
+    cover key new observations from the initial context or
+    completed investigations, not anything about next steps.`,
+    schema: {
+      type: 'object',
+      properties: {
+        title: {
+          type: 'string',
+          description: `A short title w/ the key new observations that will be displayed on top of a collapsible panel.`,
+        },
+      },
+      required: ['title'],
+    },
+  },
+  [RCA_END_PROCESS_TOOL_NAME]: {
+    description: `End the RCA process by requesting a
+    written report from another agent`,
+    schema: {
+      type: 'object',
+      properties: {
+        endProcess: {
+          type: 'boolean',
+        },
+      },
+      required: ['endProcess'],
+    },
+  },
+  [RCA_INVESTIGATE_ENTITY_TOOL_NAME]: {
+    description: `Investigate an entity`,
+    schema: {
+      type: 'object',
+      properties: {
+        context: {
+          type: 'string',
+          description: `Context that will be used in the investigation of the entity. Mention the initial context
+            of the investigation, a very short summary of the last observation if applicable, and pieces
+            of data that can be relevant for the investigation into the entity, such as timestamps or
+            keywords`,
+        },
+        entity: {
+          type: 'object',
+          description: `The entity you want to investigate, such as a service. Use
+          the Elasticsearch field names and values. For example, for services, use
+          the following structure: ${JSON.stringify({
+            entity: { field: 'service.name', value: 'opbeans-java' },
+          })}`,
+          properties: {
+            field: {
+              type: 'string',
+            },
+            value: {
+              type: 'string',
+            },
+          },
+          required: ['field', 'value'],
+        },
+      },
+      required: ['context', 'entity'],
+    },
+  },
+} as const;
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/types.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/types.ts
@ -0,0 +1,101 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import {
+  ToolMessage,
+  UserMessage,
+  ToolCallsOf,
+  ToolChoice,
+  AssistantMessageOf,
+} from '@kbn/inference-common';
+import { InferenceClient } from '@kbn/inference-plugin/server';
+import { Logger } from '@kbn/logging';
+import { AlertsClient } from '@kbn/rule-registry-plugin/server';
+import { RulesClient } from '@kbn/alerting-plugin/server';
+import { ObservabilityAIAssistantClient } from '@kbn/observability-ai-assistant-plugin/server';
+import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client';
+import {
+  RCA_END_PROCESS_TOOL_NAME,
+  RCA_INVESTIGATE_ENTITY_TOOL_NAME,
+  RCA_OBSERVE_TOOL_NAME,
+} from '@kbn/observability-ai-common/root_cause_analysis';
+import { ObservationStepSummary } from './tasks/observe_investigation_results';
+import { EntityInvestigation } from './tasks/investigate_entity';
+import { SignificantEventsTimeline } from './tasks/generate_timeline';
+import { RCA_TOOLS } from './tools';
+
+export type EndProcessToolMessage = ToolMessage<
+  typeof RCA_END_PROCESS_TOOL_NAME,
+  {
+    report: string;
+    timeline: SignificantEventsTimeline;
+  }
+>;
+
+export type ObservationToolMessage = ToolMessage<
+  typeof RCA_OBSERVE_TOOL_NAME,
+  {
+    content: string;
+  },
+  ObservationStepSummary
+>;
+
+export type InvestigateEntityToolMessage = ToolMessage<
+  typeof RCA_INVESTIGATE_ENTITY_TOOL_NAME,
+  Pick<EntityInvestigation, 'entity' | 'summary' | 'relatedEntities'>,
+  { attachments: EntityInvestigation['attachments'] }
+>;
+
+export type ToolErrorMessage = ToolMessage<
+  'error',
+  {
+    error: {
+      message: string;
+    };
+  }
+>;
+
+export type RootCauseAnalysisEvent =
+  | RootCauseAnalysisToolMessage
+  | ToolErrorMessage
+  | UserMessage
+  | AssistantMessageOf<{
+      tools: typeof RCA_TOOLS;
+      toolChoice?: ToolChoice<keyof typeof RCA_TOOLS>;
+    }>;
+
+export type RootCauseAnalysisToolRequest<
+  TToolName extends keyof typeof RCA_TOOLS = keyof typeof RCA_TOOLS
+> = ToolCallsOf<{
+  tools: Pick<typeof RCA_TOOLS, TToolName>;
+}>['toolCalls'][number];
+
+export type RootCauseAnalysisToolMessage =
+  | EndProcessToolMessage
+  | InvestigateEntityToolMessage
+  | ObservationToolMessage;
+
+export interface RootCauseAnalysisContext {
+  initialContext: string;
+  start: number;
+  end: number;
+  events: RootCauseAnalysisEvent[];
+  indices: {
+    logs: string[];
+    traces: string[];
+    sloSummaries: string[];
+  };
+  inferenceClient: InferenceClient;
+  tokenLimit: number;
+  connectorId: string;
+  esClient: ObservabilityElasticsearchClient;
+  alertsClient: AlertsClient;
+  rulesClient: RulesClient;
+  logger: Logger;
+  spaceId: string;
+  observabilityAIAssistantClient: ObservabilityAIAssistantClient;
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/call_tools.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/call_tools.ts
@ -0,0 +1,177 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import {
+  Message,
+  ToolDefinition,
+  ToolChoice,
+  ToolCallsOf,
+  withoutChunkEvents,
+  withoutTokenCountEvents,
+  ToolMessage,
+  MessageOf,
+  MessageRole,
+} from '@kbn/inference-common';
+import { InferenceClient } from '@kbn/inference-plugin/server';
+import { Logger } from '@kbn/logging';
+import {
+  defer,
+  last,
+  merge,
+  Observable,
+  of,
+  OperatorFunction,
+  share,
+  switchMap,
+  toArray,
+} from 'rxjs';
+
+interface CallToolOptions extends CallToolTools {
+  system: string;
+  messages: Message[];
+  inferenceClient: InferenceClient;
+  connectorId: string;
+  logger: Logger;
+}
+
+interface CallToolTools {
+  tools: Record<string, ToolDefinition>;
+  toolChoice?: ToolChoice;
+}
+
+type CallbackOf<
+  TCallToolTools extends CallToolTools,
+  TEmittedMessage extends Message
+> = (parameters: {
+  messages: Message[];
+  toolCalls: ToolCallsOf<TCallToolTools>['toolCalls'];
+}) => Observable<TEmittedMessage>;
+
+type GetNextRequestCallback<TCallToolTools extends CallToolTools> = ({
+  messages,
+  system,
+}: {
+  messages: Message[];
+  system: string;
+}) => { system: string; messages: Message[] } & TCallToolTools;
+
+export function callTools<TCallToolOptions extends CallToolOptions>(
+  { system, messages, inferenceClient, connectorId, tools, toolChoice, logger }: TCallToolOptions,
+  callback: CallbackOf<TCallToolOptions, ToolMessage>
+): Observable<MessageOf<TCallToolOptions>>;
+
+export function callTools<
+  TCallToolOptions extends Omit<CallToolOptions, 'tools' | 'toolChoice'> = never,
+  TCallToolTools extends CallToolTools = never,
+  TEmittedMessage extends Message = never
+>(
+  options: TCallToolOptions,
+  getNextRequest: GetNextRequestCallback<TCallToolTools>,
+  callback: CallbackOf<TCallToolTools, TEmittedMessage>
+): Observable<TEmittedMessage>;
+
+export function callTools(
+  { system, messages, inferenceClient, connectorId, tools, toolChoice, logger }: CallToolOptions,
+  ...callbacks:
+    | [GetNextRequestCallback<CallToolTools>, CallbackOf<CallToolOptions, ToolMessage>]
+    | [CallbackOf<CallToolTools, ToolMessage>]
+): Observable<Message> {
+  const callback = callbacks.length === 2 ? callbacks[1] : callbacks[0];
+
+  const getNextRequest =
+    callbacks.length === 2
+      ? callbacks[0]
+      : (next: { messages: Message[]; system: string }) => {
+          return {
+            ...next,
+            tools,
+            toolChoice,
+          };
+        };
+
+  const nextRequest = getNextRequest({ system, messages });
+
+  const chatComplete$ = defer(() =>
+    inferenceClient.chatComplete({
+      connectorId,
+      stream: true,
+      ...nextRequest,
+    })
+  );
+
+  const asCompletedMessages$ = chatComplete$.pipe(
+    withoutChunkEvents(),
+    withoutTokenCountEvents(),
+    switchMap((event) => {
+      return of({
+        role: MessageRole.Assistant as const,
+        content: event.content,
+        toolCalls: event.toolCalls,
+      });
+    })
+  );
+
+  const withToolResponses$ = asCompletedMessages$
+    .pipe(
+      switchMap((message) => {
+        if (message.toolCalls.length) {
+          return merge(
+            of(message),
+            callback({ toolCalls: message.toolCalls, messages: messages.concat(message) })
+          );
+        }
+        return of(message);
+      })
+    )
+    .pipe(handleNext());
+
+  return withToolResponses$;
+
+  function handleNext(): OperatorFunction<Message, Message> {
+    return (source$) => {
+      const shared$ = source$.pipe(share());
+
+      const next$ = merge(
+        shared$,
+        shared$.pipe(
+          toArray(),
+          last(),
+          switchMap((nextMessages) => {
+            logger.debug(() =>
+              JSON.stringify(
+                nextMessages.map((message) => {
+                  return {
+                    role: message.role,
+                    toolCalls: 'toolCalls' in message ? message.toolCalls : undefined,
+                    toolCallId: 'toolCallId' in message ? message.toolCallId : undefined,
+                  };
+                })
+              )
+            );
+
+            if (nextMessages[nextMessages.length - 1].role !== MessageRole.Assistant) {
+              const options: CallToolOptions = {
+                system,
+                connectorId,
+                inferenceClient,
+                messages: messages.concat(nextMessages),
+                tools,
+                toolChoice,
+                logger,
+              };
+              const after$ = callTools(options, getNextRequest, callback);
+              return after$;
+            }
+            return of();
+          })
+        )
+      );
+
+      return next$;
+    };
+  }
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/chunk_output_calls.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/chunk_output_calls.ts
@ -0,0 +1,97 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { encode } from 'gpt-tokenizer';
+import { uniqueId } from 'lodash';
+
+interface TextWithId {
+  id: string;
+  text: string;
+}
+
+interface Parameters {
+  system: string;
+  input: string;
+  tokenLimit: number;
+}
+
+interface ChunkedOutputRequest {
+  input: string;
+  system: string;
+}
+
+export function chunkOutputCalls({}: Parameters & { texts: string[] }): Array<
+  ChunkedOutputRequest & {
+    texts: string[];
+  }
+>;
+
+export function chunkOutputCalls({}: Parameters & { texts: TextWithId[] }): Array<
+  ChunkedOutputRequest & {
+    texts: TextWithId[];
+  }
+>;
+
+export function chunkOutputCalls({
+  system,
+  input,
+  texts,
+  tokenLimit,
+}: Parameters & {
+  texts: string[] | TextWithId[];
+}) {
+  const inputAndSystemPromptCount = encode(system).length + encode(input).length;
+
+  if (!texts.length) {
+    return [{ system, input, texts: [] }];
+  }
+
+  const textWithIds = texts.map((text) => {
+    if (typeof text === 'string') {
+      return {
+        id: uniqueId(),
+        text,
+      };
+    }
+    return text;
+  });
+
+  const textsWithCount = textWithIds.map(({ text, id }) => ({
+    tokenCount: encode(text).length,
+    text,
+    id,
+  }));
+
+  const chunks: Array<{ tokenCount: number; texts: TextWithId[] }> = [];
+
+  textsWithCount.forEach(({ text, id, tokenCount }) => {
+    let chunkWithRoomLeft = chunks.find((chunk) => {
+      return chunk.tokenCount + tokenCount <= tokenLimit;
+    });
+
+    if (!chunkWithRoomLeft) {
+      chunkWithRoomLeft = { texts: [], tokenCount: inputAndSystemPromptCount };
+      chunks.push(chunkWithRoomLeft);
+    }
+    chunkWithRoomLeft.texts.push({ text, id });
+    chunkWithRoomLeft.tokenCount += tokenCount;
+  });
+
+  const hasTextWithIds = texts.some((text) => typeof text !== 'string');
+
+  return chunks.map((chunk) => {
+    const textsForChunk = hasTextWithIds
+      ? chunk.texts
+      : chunk.texts.map((text) => (typeof text === 'string' ? text : text.text));
+
+    return {
+      system,
+      input,
+      texts: textsForChunk,
+    };
+  });
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/format_entity.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/format_entity.ts
@ -0,0 +1,12 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export function formatEntity(entity: Record<string, string>) {
+  return Object.entries(entity)
+    .map(([field, value]) => `${field}:${value}`)
+    .join('/');
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/get_previously_investigated_entities.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/get_previously_investigated_entities.ts
@ -0,0 +1,22 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { MessageRole } from '@kbn/inference-common';
+import { RCA_INVESTIGATE_ENTITY_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis';
+import { InvestigateEntityToolMessage, RootCauseAnalysisContext } from '../types';
+
+export function getPreviouslyInvestigatedEntities({
+  events,
+}: Pick<RootCauseAnalysisContext, 'events'>) {
+  const investigationToolResponses = events.filter(
+    (event): event is InvestigateEntityToolMessage => {
+      return event.role === MessageRole.Tool && event.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME;
+    }
+  );
+
+  return investigationToolResponses.map((event) => event.response.entity);
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/serialize_knowledge_base_entries.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/serialize_knowledge_base_entries.ts
@ -0,0 +1,34 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { ScoredKnowledgeBaseEntry } from '../tasks/get_knowledge_base_entries';
+import { toBlockquote } from './to_blockquote';
+
+export function serializeKnowledgeBaseEntries(entries: ScoredKnowledgeBaseEntry[]) {
+  if (!entries.length) {
+    return `## Knowledge base
+    
+    No relevant knowledge base entries were found.
+    `;
+  }
+
+  const serializedEntries = entries
+    .filter((entry) => entry.score >= 3)
+    .map(
+      (entry) => `## Entry \`${entry.id}\ (score: ${entry.score}, ${
+        entry.truncated ? `truncated` : `not truncated`
+      })
+      
+      ${toBlockquote(entry.text)}`
+    );
+
+  return `## Knowledge base
+  
+  The following relevant entries were found in the knowledge base
+
+  ${serializedEntries.join('\n\n')}`;
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/stringify_summaries.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/stringify_summaries.ts
@ -0,0 +1,47 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { compact } from 'lodash';
+import { MessageRole } from '@kbn/inference-common';
+import { RCA_OBSERVE_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis';
+import { formatEntity } from './format_entity';
+import { toBlockquote } from './to_blockquote';
+import { ObservationToolMessage, RootCauseAnalysisContext } from '../types';
+
+export function stringifySummaries({ events }: RootCauseAnalysisContext): string {
+  const summaries = events
+    .filter((event): event is ObservationToolMessage => {
+      return event.role === MessageRole.Tool && event.name === RCA_OBSERVE_TOOL_NAME;
+    })
+    .map((event) => event.data);
+
+  if (!summaries.length) {
+    return `# Previous observations
+    
+    No previous observations`;
+  }
+
+  return `# Previous observations
+  
+  ${summaries.map((summary, index) => {
+    const header = `## Observation #${index + 1}`;
+
+    const entitiesHeader = summary.investigations.length
+      ? `### Investigated entities
+      
+      ${summary.investigations
+        .map((investigation) => `- ${formatEntity(investigation.entity)}`)
+        .join('\n')}`
+      : undefined;
+
+    const summaryBody = `### Summary
+    
+    ${toBlockquote(summary.content)}`;
+
+    return compact([header, entitiesHeader, summaryBody]).join('\n\n');
+  })}`;
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/to_blockquote.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/to_blockquote.ts
@ -0,0 +1,13 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export function toBlockquote(input: string): string {
+  return input
+    .split('\n')
+    .map((line) => `> ${line}`)
+    .join('\n');
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/validate_investigate_entity_tool_call.ts
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/validate_investigate_entity_tool_call.ts
@ -0,0 +1,124 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { MessageRole, ToolCallsOf } from '@kbn/inference-common';
+import { entityQuery } from '@kbn/observability-utils-common/es/queries/entity_query';
+import { RCA_INVESTIGATE_ENTITY_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis';
+import { isEqual } from 'lodash';
+import { getEntitiesByFuzzySearch } from '@kbn/observability-utils-server/entities/get_entities_by_fuzzy_search';
+import { RCA_TOOLS } from '../tools';
+import {
+  InvestigateEntityToolMessage,
+  RootCauseAnalysisContext,
+  RootCauseAnalysisToolRequest,
+} from '../types';
+import { formatEntity } from './format_entity';
+
+interface EntityExistsResultExists {
+  exists: true;
+  entity: Record<string, string>;
+}
+
+interface EntityExistsResultDoesNotExist {
+  exists: false;
+  entity: Record<string, string>;
+  suggestions: string[];
+}
+
+type EntityExistsResult = EntityExistsResultExists | EntityExistsResultDoesNotExist;
+
+export async function validateInvestigateEntityToolCalls({
+  rcaContext,
+  toolCalls,
+}: {
+  rcaContext: Pick<RootCauseAnalysisContext, 'esClient' | 'indices' | 'start' | 'end' | 'events'>;
+  toolCalls: RootCauseAnalysisToolRequest[];
+}) {
+  const { events, esClient, indices, start, end } = rcaContext;
+
+  const previouslyInvestigatedEntities = events
+    .filter(
+      (event): event is InvestigateEntityToolMessage =>
+        event.role === MessageRole.Tool && event.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME
+    )
+    .map((toolResponse) => toolResponse.response.entity);
+
+  const investigateEntityToolCalls = toolCalls.filter(
+    (
+      toolCall
+    ): toolCall is ToolCallsOf<{
+      tools: Pick<typeof RCA_TOOLS, typeof RCA_INVESTIGATE_ENTITY_TOOL_NAME>;
+    }>['toolCalls'][number] => toolCall.function.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME
+  );
+
+  if (!investigateEntityToolCalls.length) {
+    return [];
+  }
+
+  const entitiesToInvestigate = investigateEntityToolCalls.map((toolCall) => {
+    const { entity: entityToInvestigate } = toolCall.function.arguments;
+    return {
+      [entityToInvestigate.field]: entityToInvestigate.value,
+    };
+  });
+  const entityExistsResponses: EntityExistsResult[] = await Promise.all(
+    entitiesToInvestigate.map(async (entity) => {
+      const response = await esClient.search('find_data_for_entity', {
+        track_total_hits: 1,
+        size: 0,
+        timeout: '1ms',
+        index: indices.logs.concat(indices.traces),
+        query: {
+          bool: {
+            filter: [...entityQuery(entity)],
+          },
+        },
+      });
+
+      const exists = response.hits.total.value > 0;
+      if (!exists) {
+        return getEntitiesByFuzzySearch({
+          start,
+          end,
+          esClient,
+          index: indices.logs.concat(indices.traces),
+          entity,
+        }).then((suggestions) => {
+          return {
+            entity,
+            exists,
+            suggestions,
+          };
+        });
+      }
+
+      return { entity, exists };
+    })
+  );
+
+  const alreadyInvestigatedEntities = entitiesToInvestigate.filter((entity) => {
+    return previouslyInvestigatedEntities.some((prevEntity) => isEqual(entity, prevEntity));
+  });
+
+  const errors = [
+    ...entityExistsResponses
+      .filter(
+        (entityExistsResult): entityExistsResult is EntityExistsResultDoesNotExist =>
+          !entityExistsResult.exists
+      )
+      .map(({ suggestions, entity }) => {
+        return `Entity ${formatEntity(
+          entity
+        )} does not exist. Did you mean one of ${suggestions.join(', ')}?`;
+      }),
+    ...alreadyInvestigatedEntities.map((entity) => {
+      return `Entity ${formatEntity(entity)} was already investigated before.`;
+    }),
+  ];
+
+  return errors;
+}
--- a/x-pack/packages/observability/observability_ai/observability_ai_server/tsconfig.json
+++ b/x-pack/packages/observability/observability_ai/observability_ai_server/tsconfig.json
@ -0,0 +1,29 @@
+{
+  "extends": "../../../../../tsconfig.base.json",
+  "compilerOptions": {
+    "outDir": "target/types",
+    "types": [
+      "jest",
+      "node",
+      "react"
+    ]
+  },
+  "include": [
+    "**/*.ts"
+  ],
+  "exclude": [
+    "target/**/*"
+  ],
+  "kbn_references": [
+    "@kbn/observability-utils-common",
+    "@kbn/alerting-plugin",
+    "@kbn/rule-registry-plugin",
+    "@kbn/inference-plugin",
+    "@kbn/logging",
+    "@kbn/calculate-auto",
+    "@kbn/observability-ai-assistant-plugin",
+    "@kbn/inference-common",
+    "@kbn/observability-ai-common",
+    "@kbn/observability-utils-server",
+  ]
+}
--- a/x-pack/packages/observability/observability_utils/observability_utils_server/entities/get_entities_by_fuzzy_search.ts
+++ b/x-pack/packages/observability/observability_utils/observability_utils_server/entities/get_entities_by_fuzzy_search.ts
@ -0,0 +1,50 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { castArray, orderBy } from 'lodash';
+import Fuse from 'fuse.js';
+import { ObservabilityElasticsearchClient } from '../es/client/create_observability_es_client';
+
+export async function getEntitiesByFuzzySearch({
+  esClient,
+  entity,
+  start,
+  end,
+  index,
+}: {
+  esClient: ObservabilityElasticsearchClient;
+  entity: Record<string, string>;
+  start: number;
+  end: number;
+  index: string | string[];
+}): Promise<string[]> {
+  if (Object.keys(entity).length > 1) {
+    return [];
+  }
+
+  const [field, value] = Object.entries(entity)[0];
+
+  const { terms } = await esClient.client.termsEnum({
+    index: castArray(index).join(','),
+    field,
+    index_filter: {
+      range: {
+        '@timestamp': {
+          gte: new Date(start).toISOString(),
+          lte: new Date(end).toISOString(),
+        },
+      },
+    },
+    size: 10_000,
+  });
+
+  const results = new Fuse(terms, { includeScore: true, threshold: 0.75 }).search(value);
+
+  return orderBy(results, (result) => result.score, 'asc')
+    .slice(0, 5)
+    .map((result) => result.item);
+}
--- a/x-pack/packages/observability/observability_utils/observability_utils_server/entities/get_log_patterns.ts
+++ b/x-pack/packages/observability/observability_utils/observability_utils_server/entities/get_log_patterns.ts
@ -0,0 +1,405 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import {
+  AggregationsCategorizeTextAggregation,
+  AggregationsDateHistogramAggregation,
+  AggregationsMaxAggregation,
+  AggregationsMinAggregation,
+  AggregationsTopHitsAggregation,
+  QueryDslQueryContainer,
+} from '@elastic/elasticsearch/lib/api/types';
+import { categorizationAnalyzer } from '@kbn/aiops-log-pattern-analysis/categorization_analyzer';
+import { ChangePointType } from '@kbn/es-types/src';
+import { pValueToLabel } from '@kbn/observability-utils-common/ml/p_value_to_label';
+import { calculateAuto } from '@kbn/calculate-auto';
+import { omit, orderBy, uniqBy } from 'lodash';
+import moment from 'moment';
+import { ObservabilityElasticsearchClient } from '../es/client/create_observability_es_client';
+import { kqlQuery } from '../es/queries/kql_query';
+import { rangeQuery } from '../es/queries/range_query';
+
+interface FieldPatternResultBase {
+  field: string;
+  count: number;
+  pattern: string;
+  regex: string;
+  sample: string;
+  firstOccurrence: string;
+  lastOccurrence: string;
+  highlight: Record<string, string[]>;
+  metadata: Record<string, unknown[]>;
+}
+
+interface FieldPatternResultChanges {
+  timeseries: Array<{ x: number; y: number }>;
+  change: {
+    timestamp?: string;
+    significance: 'high' | 'medium' | 'low' | null;
+    type: ChangePointType;
+    change_point?: number;
+    p_value?: number;
+  };
+}
+
+export type FieldPatternResult<TChanges extends boolean | undefined = undefined> =
+  FieldPatternResultBase & (TChanges extends true ? FieldPatternResultChanges : {});
+
+export type FieldPatternResultWithChanges = FieldPatternResult<true>;
+
+interface CategorizeTextOptions {
+  query: QueryDslQueryContainer;
+  metadata: string[];
+  esClient: ObservabilityElasticsearchClient;
+  samplingProbability: number;
+  fields: string[];
+  index: string | string[];
+  useMlStandardTokenizer: boolean;
+  size: number;
+  start: number;
+  end: number;
+}
+// eslint-disable-next-line @typescript-eslint/consistent-type-definitions
+type CategorizeTextSubAggregations = {
+  sample: { top_hits: AggregationsTopHitsAggregation };
+  minTimestamp: { min: AggregationsMinAggregation };
+  maxTimestamp: { max: AggregationsMaxAggregation };
+};
+
+interface CategorizeTextAggregationResult {
+  categorize_text: AggregationsCategorizeTextAggregation;
+  aggs: CategorizeTextSubAggregations &
+    (
+      | {}
+      | {
+          timeseries: { date_histogram: AggregationsDateHistogramAggregation };
+          changes: { change_point: { buckets_path: string } };
+        }
+    );
+}
+
+export async function runCategorizeTextAggregation<
+  TChanges extends boolean | undefined = undefined
+>(
+  options: CategorizeTextOptions & { includeChanges?: TChanges }
+): Promise<Array<FieldPatternResult<TChanges>>>;
+
+export async function runCategorizeTextAggregation({
+  esClient,
+  fields,
+  metadata,
+  index,
+  query,
+  samplingProbability,
+  useMlStandardTokenizer,
+  includeChanges,
+  size,
+  start,
+  end,
+}: CategorizeTextOptions & { includeChanges?: boolean }): Promise<
+  Array<FieldPatternResult<boolean>>
+> {
+  const aggs = Object.fromEntries(
+    fields.map((field): [string, CategorizeTextAggregationResult] => [
+      field,
+      {
+        categorize_text: {
+          field,
+          min_doc_count: 1,
+          size,
+          categorization_analyzer: useMlStandardTokenizer
+            ? {
+                tokenizer: 'ml_standard',
+                char_filter: [
+                  {
+                    type: 'pattern_replace',
+                    pattern: '\\\\n',
+                    replacement: '',
+                  } as unknown as string,
+                ],
+              }
+            : categorizationAnalyzer,
+        },
+        aggs: {
+          minTimestamp: {
+            min: {
+              field: '@timestamp',
+            },
+          },
+          maxTimestamp: {
+            max: {
+              field: '@timestamp',
+            },
+          },
+          ...(includeChanges
+            ? {
+                timeseries: {
+                  date_histogram: {
+                    field: '@timestamp',
+                    min_doc_count: 0,
+                    extended_bounds: {
+                      min: start,
+                      max: end,
+                    },
+                    fixed_interval: `${calculateAuto
+                      .atLeast(30, moment.duration(end - start, 'ms'))!
+                      .asMilliseconds()}ms`,
+                  },
+                },
+                changes: {
+                  change_point: {
+                    buckets_path: 'timeseries>_count',
+                  },
+                },
+              }
+            : {}),
+          sample: {
+            top_hits: {
+              size: 1,
+              _source: false,
+              fields: [field, ...metadata],
+              sort: {
+                _score: {
+                  order: 'desc',
+                },
+              },
+              highlight: {
+                fields: {
+                  '*': {},
+                },
+              },
+            },
+          },
+        },
+      },
+    ])
+  );
+
+  const response = await esClient.search('get_log_patterns', {
+    index,
+    size: 0,
+    track_total_hits: false,
+    query: {
+      bool: {
+        filter: [query, ...rangeQuery(start, end)],
+      },
+    },
+    aggregations: {
+      sampler: {
+        random_sampler: {
+          probability: samplingProbability,
+        },
+        aggs,
+      },
+    },
+  });
+
+  if (!response.aggregations) {
+    return [];
+  }
+
+  const fieldAggregates = omit(response.aggregations.sampler, 'seed', 'doc_count', 'probability');
+
+  return Object.entries(fieldAggregates).flatMap(([fieldName, aggregate]) => {
+    const buckets = aggregate.buckets;
+
+    return buckets.map((bucket) => {
+      return {
+        field: fieldName,
+        count: bucket.doc_count,
+        pattern: bucket.key,
+        regex: bucket.regex,
+        sample: bucket.sample.hits.hits[0].fields![fieldName][0] as string,
+        highlight: bucket.sample.hits.hits[0].highlight ?? {},
+        metadata: bucket.sample.hits.hits[0].fields!,
+        firstOccurrence: new Date(bucket.minTimestamp.value!).toISOString(),
+        lastOccurrence: new Date(bucket.maxTimestamp.value!).toISOString(),
+        ...('timeseries' in bucket
+          ? {
+              timeseries: bucket.timeseries.buckets.map((dateBucket) => ({
+                x: dateBucket.key,
+                y: dateBucket.doc_count,
+              })),
+              change: Object.entries(bucket.changes.type).map(
+                ([changePointType, change]): FieldPatternResultChanges['change'] => {
+                  return {
+                    type: changePointType as ChangePointType,
+                    significance:
+                      change.p_value !== undefined ? pValueToLabel(change.p_value) : null,
+                    change_point: change.change_point,
+                    p_value: change.p_value,
+                    timestamp:
+                      change.change_point !== undefined
+                        ? bucket.timeseries.buckets[change.change_point].key_as_string
+                        : undefined,
+                  };
+                }
+              )[0],
+            }
+          : {}),
+      };
+    });
+  });
+}
+
+interface LogPatternOptions {
+  esClient: ObservabilityElasticsearchClient;
+  start: number;
+  end: number;
+  index: string | string[];
+  kuery: string;
+  metadata?: string[];
+  fields: string[];
+}
+
+export async function getLogPatterns<TChanges extends boolean | undefined = undefined>(
+  options: LogPatternOptions & { includeChanges?: TChanges }
+): Promise<Array<FieldPatternResult<TChanges>>>;
+
+export async function getLogPatterns({
+  esClient,
+  start,
+  end,
+  index,
+  kuery,
+  includeChanges,
+  metadata = [],
+  fields,
+}: LogPatternOptions & { includeChanges?: boolean }): Promise<Array<FieldPatternResult<boolean>>> {
+  const fieldCapsResponse = await esClient.fieldCaps('get_field_caps_for_log_pattern_analysis', {
+    fields,
+    index_filter: {
+      bool: {
+        filter: [...rangeQuery(start, end)],
+      },
+    },
+    index,
+    types: ['text', 'match_only_text'],
+  });
+
+  const fieldsInFieldCaps = Object.keys(fieldCapsResponse.fields);
+
+  if (!fieldsInFieldCaps.length) {
+    return [];
+  }
+
+  const totalDocsResponse = await esClient.search('get_total_docs_for_log_pattern_analysis', {
+    index,
+    size: 0,
+    track_total_hits: true,
+    query: {
+      bool: {
+        filter: [...kqlQuery(kuery), ...rangeQuery(start, end)],
+      },
+    },
+  });
+
+  const totalHits = totalDocsResponse.hits.total.value;
+
+  if (totalHits === 0) {
+    return [];
+  }
+
+  let samplingProbability = 100_000 / totalHits;
+
+  if (samplingProbability >= 0.5) {
+    samplingProbability = 1;
+  }
+
+  const fieldGroups = includeChanges
+    ? fieldsInFieldCaps.map((field) => [field])
+    : [fieldsInFieldCaps];
+
+  const allPatterns = await Promise.all(
+    fieldGroups.map(async (fieldGroup) => {
+      const topMessagePatterns = await runCategorizeTextAggregation({
+        esClient,
+        index,
+        fields: fieldGroup,
+        query: {
+          bool: {
+            filter: kqlQuery(kuery),
+          },
+        },
+        samplingProbability,
+        useMlStandardTokenizer: false,
+        size: 100,
+        start,
+        end,
+        includeChanges,
+        metadata,
+      });
+
+      if (topMessagePatterns.length === 0) {
+        return [];
+      }
+
+      const patternsToExclude = topMessagePatterns.filter((pattern) => {
+        // elasticsearch will barf because the query is too complex. this measures
+        // the # of groups to capture for a measure of complexity.
+        const complexity = pattern.regex.match(/(\.\+\?)|(\.\*\?)/g)?.length ?? 0;
+        return (
+          complexity <= 25 &&
+          // anything less than 50 messages should be re-processed with the ml_standard tokenizer
+          pattern.count > 50
+        );
+      });
+
+      const rareMessagePatterns = await runCategorizeTextAggregation({
+        esClient,
+        index,
+        fields: fieldGroup,
+        start,
+        end,
+        query: {
+          bool: {
+            filter: kqlQuery(kuery),
+            must_not: [
+              ...patternsToExclude.map((pattern) => {
+                return {
+                  bool: {
+                    filter: [
+                      {
+                        regexp: {
+                          [pattern.field]: {
+                            value: pattern.regex,
+                          },
+                        },
+                      },
+                      {
+                        match: {
+                          [pattern.field]: {
+                            query: pattern.pattern,
+                            fuzziness: 0,
+                            operator: 'and' as const,
+                            auto_generate_synonyms_phrase_query: false,
+                          },
+                        },
+                      },
+                    ],
+                  },
+                };
+              }),
+            ],
+          },
+        },
+        size: 1000,
+        includeChanges,
+        samplingProbability: 1,
+        useMlStandardTokenizer: true,
+        metadata,
+      });
+
+      return [...patternsToExclude, ...rareMessagePatterns];
+    })
+  );
+
+  return uniqBy(
+    orderBy(allPatterns.flat(), (pattern) => pattern.count, 'desc'),
+    (pattern) => pattern.sample
+  );
+}
--- a/x-pack/packages/observability/observability_utils/observability_utils_server/tsconfig.json
+++ b/x-pack/packages/observability/observability_utils/observability_utils_server/tsconfig.json
@ -24,6 +24,8 @@
    "@kbn/alerting-plugin",
    "@kbn/rule-registry-plugin",
    "@kbn/rule-data-utils",
+    "@kbn/aiops-log-pattern-analysis",
+    "@kbn/calculate-auto",
    "@kbn/utility-types",
    "@kbn/task-manager-plugin",
  ]
--- a/x-pack/platform/packages/shared/ai-infra/inference-common/index.ts
+++ b/x-pack/platform/packages/shared/ai-infra/inference-common/index.ts
@ -13,6 +13,9 @@ export {
  type AssistantMessage,
  type ToolMessage,
  type UserMessage,
+  type MessageOf,
+  type AssistantMessageOf,
+  type ToolMessageOf,
  type ToolSchemaType,
  type FromToolSchema,
  type ToolSchema,
--- a/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/index.ts
+++ b/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/index.ts
@ -33,6 +33,9 @@ export {
  type AssistantMessage,
  type UserMessage,
  type ToolMessage,
+  type AssistantMessageOf,
+  type MessageOf,
+  type ToolMessageOf,
 } from './messages';
 export { type ToolSchema, type ToolSchemaType, type FromToolSchema } from './tool_schema';
 export {
--- a/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/messages.ts
+++ b/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/messages.ts
@ -5,7 +5,7 @@
 * 2.0.
 */

-import type { ToolCall } from './tools';
+import type { ToolCall, ToolCallsOf, ToolNamesOf, ToolOptions, ToolResponsesOf } from './tools';

 /**
 * Enum for all possible {@link Message} roles.
@ -52,17 +52,32 @@ export type AssistantMessage = MessageBase<MessageRole.Assistant> & {
 /**
 * Represents a tool invocation result, following a request from the LLM to execute a tool.
 */
-export type ToolMessage<TToolResponse extends Record<string, any> | unknown> =
-  MessageBase<MessageRole.Tool> & {
-    /**
-     * The call id matching the {@link ToolCall} this tool message is for.
-     */
-    toolCallId: string;
-    /**
-     * The response from the tool invocation.
-     */
-    response: TToolResponse;
-  };
+export type ToolMessage<
+  TName extends string = string,
+  TToolResponse extends Record<string, any> | unknown = Record<string, any> | unknown,
+  TToolData extends Record<string, any> | undefined = Record<string, any> | undefined
+> = MessageBase<MessageRole.Tool> & {
+  /*
+   * The name of the tool called. Used for refining the type of the response.
+   */
+  name: TName;
+  /**
+   * The call id matching the {@link ToolCall} this tool message is for.
+   */
+  toolCallId: string;
+  /**
+   * The response from the tool invocation.
+   */
+  response: TToolResponse;
+} & (TToolData extends undefined
+    ? {}
+    : {
+        /**
+         * Additional data from the tool invocation, that is not sent to the LLM
+         * but can be used to attach baggage (such as timeseries or debug data)
+         */
+        data: TToolData;
+      });

 /**
 * Mixin composed of all the possible types of messages in a chatComplete discussion.
@ -72,4 +87,30 @@ export type ToolMessage<TToolResponse extends Record<string, any> | unknown> =
 * - {@link AssistantMessage}
 * - {@link ToolMessage}
 */
-export type Message = UserMessage | AssistantMessage | ToolMessage<unknown>;
+export type Message = UserMessage | AssistantMessage | ToolMessage;
+
+/**
+ * Utility type to get the Assistant message type of a {@link ToolOptions} type.
+ */
+export type AssistantMessageOf<TToolOptions extends ToolOptions> = Omit<
+  AssistantMessage,
+  'toolCalls'
+> &
+  ToolCallsOf<TToolOptions>;
+
+/**
+ * Utility type to get the Tool message type of a {@link ToolOptions} type.
+ */
+
+export type ToolMessageOf<TToolOptions extends ToolOptions> = ToolMessage<
+  ToolNamesOf<TToolOptions>,
+  ToolResponsesOf<TToolOptions['tools']>
+>;
+
+/**
+ * Utility type to get the mixin Message type of a {@link ToolOptions} type.
+ */
+export type MessageOf<TToolOptions extends ToolOptions> =
+  | UserMessage
+  | AssistantMessageOf<TToolOptions>
+  | ToolMessageOf<TToolOptions>;
--- a/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/tools.ts
+++ b/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/tools.ts
@ -8,24 +8,24 @@
 import type { ValuesType } from 'utility-types';
 import { FromToolSchema, ToolSchema } from './tool_schema';

-type Assert<TValue, TType> = TValue extends TType ? TValue & TType : never;
-
 type ToolsOfChoice<TToolOptions extends ToolOptions> = TToolOptions['toolChoice'] extends {
  function: infer TToolName;
 }
  ? TToolName extends keyof TToolOptions['tools']
-    ? Pick<TToolOptions['tools'], TToolName>
+    ? TToolName extends string
+      ? Pick<TToolOptions['tools'], TToolName>
+      : TToolOptions['tools']
    : TToolOptions['tools']
  : TToolOptions['tools'];

 /**
 * Utility type to infer the tool calls response shape.
 */
-type ToolResponsesOf<TTools extends Record<string, ToolDefinition> | undefined> =
+export type ToolResponsesOf<TTools extends Record<string, ToolDefinition> | undefined> =
  TTools extends Record<string, ToolDefinition>
    ? Array<
        ValuesType<{
-          [TName in keyof TTools]: ToolResponseOf<Assert<TName, string>, TTools[TName]>;
+          [TName in keyof TTools & string]: ToolCall<TName, ToolResponseOf<TTools[TName]>>;
        }>
      >
    : never[];
@ -33,10 +33,11 @@ type ToolResponsesOf<TTools extends Record<string, ToolDefinition> | undefined>
 /**
 * Utility type to infer the tool call response shape.
 */
-type ToolResponseOf<TName extends string, TToolDefinition extends ToolDefinition> = ToolCall<
-  TName,
-  TToolDefinition extends { schema: ToolSchema } ? FromToolSchema<TToolDefinition['schema']> : {}
->;
+export type ToolResponseOf<TToolDefinition extends ToolDefinition> = TToolDefinition extends {
+  schema: ToolSchema;
+}
+  ? FromToolSchema<TToolDefinition['schema']>
+  : {};

 /**
 * Tool invocation choice type.
@ -129,6 +130,10 @@ export interface ToolCall<
    name: TName;
  } & (TArguments extends Record<string, any> ? { arguments: TArguments } : {});
 }
+/**
+ * Utility type to get the tool names of ToolOptions
+ */
+export type ToolNamesOf<TToolOptions extends ToolOptions> = keyof TToolOptions['tools'] & string;

 /**
 * Tool-related parameters of {@link ChatCompleteAPI}
--- a/x-pack/platform/packages/shared/ai-infra/inference-common/src/output/api.ts
+++ b/x-pack/platform/packages/shared/ai-infra/inference-common/src/output/api.ts
@ -96,6 +96,17 @@ export interface OutputOptions<
   * Defaults to false.
   */
  stream?: TStream;
+
+  /**
+   * Optional configuration for retrying the call if an error occurs.
+   */
+  retry?: {
+    /**
+     * Whether to retry on validation errors. Can be a number or retries,
+     * or a boolean, which means one retry.
+     */
+    onValidationError?: boolean | number;
+  };
 }

 /**
--- a/x-pack/platform/packages/shared/ai-infra/product-doc-common/package.json
+++ b/x-pack/platform/packages/shared/ai-infra/product-doc-common/package.json
@ -3,4 +3,4 @@
  "private": true,
  "version": "1.0.0",
  "license": "Elastic License 2.0"
-}
+}
--- a/x-pack/platform/packages/shared/ml/aiops_log_pattern_analysis/categorization_analyzer.ts
+++ b/x-pack/platform/packages/shared/ml/aiops_log_pattern_analysis/categorization_analyzer.ts
@ -0,0 +1,72 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { AggregationsCustomCategorizeTextAnalyzer } from '@elastic/elasticsearch/lib/api/types';
+
+// This is a copy of the default categorization analyzer but using the 'standard' tokenizer rather than the 'ml_standard' tokenizer.
+// The 'ml_standard' tokenizer splits tokens in a way that was observed to give better categories in testing many years ago, however,
+// the downside of these better categories is then a potential failure to match the original documents when creating a filter for Discover.
+// A future enhancement would be to check which analyzer is specified in the mappings for the source field and to use
+// that instead of unconditionally using 'standard'.
+// However for an initial fix, using the standard analyzer will be more likely to match the results from the majority of searches.
+export const categorizationAnalyzer: AggregationsCustomCategorizeTextAnalyzer = {
+  char_filter: ['first_line_with_letters'],
+  tokenizer: 'standard',
+  filter: [
+    // @ts-expect-error filter type in AggregationsCustomCategorizeTextAnalyzer is incorrect
+    {
+      type: 'stop',
+      stopwords: [
+        'Monday',
+        'Tuesday',
+        'Wednesday',
+        'Thursday',
+        'Friday',
+        'Saturday',
+        'Sunday',
+        'Mon',
+        'Tue',
+        'Wed',
+        'Thu',
+        'Fri',
+        'Sat',
+        'Sun',
+        'January',
+        'February',
+        'March',
+        'April',
+        'May',
+        'June',
+        'July',
+        'August',
+        'September',
+        'October',
+        'November',
+        'December',
+        'Jan',
+        'Feb',
+        'Mar',
+        'Apr',
+        'May',
+        'Jun',
+        'Jul',
+        'Aug',
+        'Sep',
+        'Oct',
+        'Nov',
+        'Dec',
+        'GMT',
+        'UTC',
+      ],
+    },
+    // @ts-expect-error filter type in AggregationsCustomCategorizeTextAnalyzer is incorrect
+    {
+      type: 'limit',
+      max_token_count: '100',
+    },
+  ],
+};
--- a/x-pack/platform/packages/shared/ml/aiops_log_pattern_analysis/create_category_request.ts
+++ b/x-pack/platform/packages/shared/ml/aiops_log_pattern_analysis/create_category_request.ts
@ -5,16 +5,14 @@
 * 2.0.
 */

-import type {
-  QueryDslQueryContainer,
-  AggregationsCustomCategorizeTextAnalyzer,
-} from '@elastic/elasticsearch/lib/api/types';
+import type { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/types';
 import type { MappingRuntimeFields } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
 import { isPopulatedObject } from '@kbn/ml-is-populated-object/src/is_populated_object';

 import type { createRandomSamplerWrapper } from '@kbn/ml-random-sampler-utils';

 import { createDefaultQuery } from '@kbn/aiops-common/create_default_query';
+import { categorizationAnalyzer } from './categorization_analyzer';

 const CATEGORY_LIMIT = 1000;
 const EXAMPLE_LIMIT = 4;
@ -121,67 +119,3 @@ export function createCategoryRequest(
    },
  };
 }
-
-// This is a copy of the default categorization analyzer but using the 'standard' tokenizer rather than the 'ml_standard' tokenizer.
-// The 'ml_standard' tokenizer splits tokens in a way that was observed to give better categories in testing many years ago, however,
-// the downside of these better categories is then a potential failure to match the original documents when creating a filter for Discover.
-// A future enhancement would be to check which analyzer is specified in the mappings for the source field and to use
-// that instead of unconditionally using 'standard'.
-// However for an initial fix, using the standard analyzer will be more likely to match the results from the majority of searches.
-const categorizationAnalyzer: AggregationsCustomCategorizeTextAnalyzer = {
-  char_filter: ['first_line_with_letters'],
-  tokenizer: 'standard',
-  filter: [
-    // @ts-expect-error filter type in AggregationsCustomCategorizeTextAnalyzer is incorrect
-    {
-      type: 'stop',
-      stopwords: [
-        'Monday',
-        'Tuesday',
-        'Wednesday',
-        'Thursday',
-        'Friday',
-        'Saturday',
-        'Sunday',
-        'Mon',
-        'Tue',
-        'Wed',
-        'Thu',
-        'Fri',
-        'Sat',
-        'Sun',
-        'January',
-        'February',
-        'March',
-        'April',
-        'May',
-        'June',
-        'July',
-        'August',
-        'September',
-        'October',
-        'November',
-        'December',
-        'Jan',
-        'Feb',
-        'Mar',
-        'Apr',
-        'May',
-        'Jun',
-        'Jul',
-        'Aug',
-        'Sep',
-        'Oct',
-        'Nov',
-        'Dec',
-        'GMT',
-        'UTC',
-      ],
-    },
-    // @ts-expect-error filter type in AggregationsCustomCategorizeTextAnalyzer is incorrect
-    {
-      type: 'limit',
-      max_token_count: '100',
-    },
-  ],
-};
--- a/x-pack/platform/plugins/private/translations/translations/fr-FR.json
+++ b/x-pack/platform/plugins/private/translations/translations/fr-FR.json
@ -26099,7 +26099,6 @@
    "xpack.investigateApp.addObservationUI.h2.addAnObservationChartLabel": "Ajouter un graphique d'observation",
    "xpack.investigateApp.addObservationUI.p.selectADataSourceLabel": "Sélectionnez une source de données pour générer un graphique d'aperçu",
    "xpack.investigateApp.appTitle": "Investigations",
-    "xpack.investigateApp.assistantHypothesis.observabilityAIAssistantContextualInsight.helpMeInvestigateThisLabel": "Aidez-moi à résoudre la cause de cet échec",
    "xpack.investigateApp.defaultChart.error_equation.description": "Vérifiez l'équation.",
    "xpack.investigateApp.defaultChart.error_equation.title": "Une erreur s'est produite lors de l'affichage du graphique",
    "xpack.investigateApp.defaultChart.noData.title": "Aucune donnée graphique disponible",
--- a/x-pack/platform/plugins/private/translations/translations/ja-JP.json
+++ b/x-pack/platform/plugins/private/translations/translations/ja-JP.json
@ -25957,7 +25957,6 @@
    "xpack.investigateApp.addObservationUI.h2.addAnObservationChartLabel": "観測グラフを追加",
    "xpack.investigateApp.addObservationUI.p.selectADataSourceLabel": "データソースを選択して、プレビューグラフを生成",
    "xpack.investigateApp.appTitle": "調査",
-    "xpack.investigateApp.assistantHypothesis.observabilityAIAssistantContextualInsight.helpMeInvestigateThisLabel": "このエラーの調査を支援",
    "xpack.investigateApp.defaultChart.error_equation.description": "式を確認してください。",
    "xpack.investigateApp.defaultChart.error_equation.title": "グラフの表示中にエラーが発生しました",
    "xpack.investigateApp.defaultChart.noData.title": "グラフデータがありません",
--- a/x-pack/platform/plugins/private/translations/translations/zh-CN.json
+++ b/x-pack/platform/plugins/private/translations/translations/zh-CN.json
@ -26040,7 +26040,6 @@
    "xpack.investigateApp.addObservationUI.h2.addAnObservationChartLabel": "添加观察图表",
    "xpack.investigateApp.addObservationUI.p.selectADataSourceLabel": "选择数据源以生成预览图表",
    "xpack.investigateApp.appTitle": "调查",
-    "xpack.investigateApp.assistantHypothesis.observabilityAIAssistantContextualInsight.helpMeInvestigateThisLabel": "帮助我调查此故障",
    "xpack.investigateApp.defaultChart.error_equation.description": "检查方程。",
    "xpack.investigateApp.defaultChart.error_equation.title": "渲染图表时出错",
    "xpack.investigateApp.defaultChart.noData.title": "没有可用图表数据",
--- a/x-pack/platform/plugins/shared/inference/common/output/create_output_api.test.ts
+++ b/x-pack/platform/plugins/shared/inference/common/output/create_output_api.test.ts
@ -12,6 +12,7 @@ import {
  ChatCompletionEventType,
 } from '@kbn/inference-common';
 import { createOutputApi } from './create_output_api';
+import { createToolValidationError } from '../../server/chat_complete/errors';

 describe('createOutputApi', () => {
  let chatComplete: jest.Mock;
@ -119,4 +120,80 @@ describe('createOutputApi', () => {
      },
    ]);
  });
+
+  describe('when using retry', () => {
+    const unvalidatedFailedToolCall = {
+      function: {
+        name: 'myFunction',
+        arguments: JSON.stringify({ foo: 'bar' }),
+      },
+      toolCallId: 'foo',
+    };
+
+    const validationError = createToolValidationError('Validation failed', {
+      toolCalls: [unvalidatedFailedToolCall],
+    });
+
+    it('retries once when onValidationError is a boolean', async () => {
+      chatComplete.mockRejectedValueOnce(validationError);
+      chatComplete.mockResolvedValueOnce(
+        Promise.resolve({ content: 'retried content', toolCalls: [unvalidatedFailedToolCall] })
+      );
+
+      const output = createOutputApi(chatComplete);
+
+      const response = await output({
+        id: 'retry-id',
+        stream: false,
+        connectorId: '.retry-connector',
+        input: 'input message',
+        retry: {
+          onValidationError: true,
+        },
+      });
+
+      expect(chatComplete).toHaveBeenCalledTimes(2);
+      expect(response).toEqual({
+        id: 'retry-id',
+        content: 'retried content',
+        output: unvalidatedFailedToolCall.function.arguments,
+      });
+    });
+
+    it('retries the number of specified attempts', async () => {
+      chatComplete.mockRejectedValue(validationError);
+
+      const output = createOutputApi(chatComplete);
+
+      await expect(
+        output({
+          id: 'retry-id',
+          stream: false,
+          connectorId: '.retry-connector',
+          input: 'input message',
+          retry: {
+            onValidationError: 2,
+          },
+        })
+      ).rejects.toThrow('Validation failed');
+
+      expect(chatComplete).toHaveBeenCalledTimes(3);
+    });
+
+    it('throws an error if retry is provided in streaming mode', () => {
+      const output = createOutputApi(chatComplete);
+
+      expect(() =>
+        output({
+          id: 'stream-retry-id',
+          stream: true,
+          connectorId: '.stream-retry-connector',
+          input: 'input message',
+          retry: {
+            onValidationError: 1,
+          },
+        })
+      ).toThrowError('Retry options are not supported in streaming mode');
+    });
+  });
 });
--- a/x-pack/platform/plugins/shared/inference/common/output/create_output_api.ts
+++ b/x-pack/platform/plugins/shared/inference/common/output/create_output_api.ts
@ -10,17 +10,22 @@ import {
  ChatCompletionEventType,
  MessageRole,
  OutputAPI,
+  OutputCompositeResponse,
  OutputEventType,
  OutputOptions,
  ToolSchema,
+  isToolValidationError,
  withoutTokenCountEvents,
 } from '@kbn/inference-common';
 import { isObservable, map } from 'rxjs';
 import { ensureMultiTurn } from '../utils/ensure_multi_turn';

+type DefaultOutputOptions = OutputOptions<string, ToolSchema | undefined, boolean>;
+
 export function createOutputApi(chatCompleteApi: ChatCompleteAPI): OutputAPI;
+
 export function createOutputApi(chatCompleteApi: ChatCompleteAPI) {
-  return ({
+  return function callOutputApi({
    id,
    connectorId,
    input,
@ -29,19 +34,26 @@ export function createOutputApi(chatCompleteApi: ChatCompleteAPI) {
    previousMessages,
    functionCalling,
    stream,
-  }: OutputOptions<string, ToolSchema | undefined, boolean>) => {
+    retry,
+  }: DefaultOutputOptions): OutputCompositeResponse<string, ToolSchema | undefined, boolean> {
+    if (stream && retry !== undefined) {
+      throw new Error(`Retry options are not supported in streaming mode`);
+    }
+
+    const messages = ensureMultiTurn([
+      ...(previousMessages || []),
+      {
+        role: MessageRole.User,
+        content: input,
+      },
+    ]);
+
    const response = chatCompleteApi({
      connectorId,
      stream,
      functionCalling,
      system,
-      messages: ensureMultiTurn([
-        ...(previousMessages || []),
-        {
-          role: MessageRole.User,
-          content: input,
-        },
-      ]),
+      messages,
      ...(schema
        ? {
            tools: {
@ -79,16 +91,55 @@ export function createOutputApi(chatCompleteApi: ChatCompleteAPI) {
        })
      );
    } else {
-      return response.then((chatResponse) => {
-        return {
-          id,
-          content: chatResponse.content,
-          output:
-            chatResponse.toolCalls.length && 'arguments' in chatResponse.toolCalls[0].function
-              ? chatResponse.toolCalls[0].function.arguments
-              : undefined,
-        };
-      });
+      return response.then(
+        (chatResponse) => {
+          return {
+            id,
+            content: chatResponse.content,
+            output:
+              chatResponse.toolCalls.length && 'arguments' in chatResponse.toolCalls[0].function
+                ? chatResponse.toolCalls[0].function.arguments
+                : undefined,
+          };
+        },
+        (error: Error) => {
+          if (isToolValidationError(error) && retry?.onValidationError) {
+            const retriesLeft =
+              typeof retry.onValidationError === 'number' ? retry.onValidationError : 1;
+
+            return callOutputApi({
+              id,
+              connectorId,
+              input,
+              schema,
+              system,
+              previousMessages: messages.concat(
+                {
+                  role: MessageRole.Assistant as const,
+                  content: '',
+                  toolCalls: error.meta.toolCalls!,
+                },
+                ...(error.meta.toolCalls?.map((toolCall) => {
+                  return {
+                    name: toolCall.function.name,
+                    role: MessageRole.Tool as const,
+                    toolCallId: toolCall.toolCallId,
+                    response: {
+                      error: error.meta,
+                    },
+                  };
+                }) ?? [])
+              ),
+              functionCalling,
+              stream: false,
+              retry: {
+                onValidationError: retriesLeft - 1,
+              },
+            }) as OutputCompositeResponse<string, ToolSchema | undefined, false>;
+          }
+          throw error;
+        }
+      );
    }
  };
 }
--- a/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/bedrock/bedrock_claude_adapter.test.ts
+++ b/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/bedrock/bedrock_claude_adapter.test.ts
@ -170,6 +170,7 @@ describe('bedrockClaudeAdapter', () => {
            ],
          },
          {
+            name: 'my_function',
            role: MessageRole.Tool,
            toolCallId: '0',
            response: {
--- a/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/gemini/gemini_adapter.test.ts
+++ b/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/gemini/gemini_adapter.test.ts
@ -172,6 +172,7 @@ describe('geminiAdapter', () => {
            ],
          },
          {
+            name: 'my_function',
            role: MessageRole.Tool,
            toolCallId: '0',
            response: {
--- a/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/openai/openai_adapter.test.ts
+++ b/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/openai/openai_adapter.test.ts
@ -142,6 +142,7 @@ describe('openAIAdapter', () => {
            ],
          },
          {
+            name: 'my_function',
            role: MessageRole.Tool,
            toolCallId: '0',
            response: {
--- a/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/openai/openai_adapter.ts
+++ b/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/openai/openai_adapter.ts
@ -58,7 +58,6 @@ export const openAIAdapter: InferenceConnectorAdapter = {
      request = {
        stream,
        messages: messagesToOpenAI({ system: wrapped.system, messages: wrapped.messages }),
-        temperature: 0,
      };
    } else {
      request = {
@ -66,7 +65,6 @@ export const openAIAdapter: InferenceConnectorAdapter = {
        messages: messagesToOpenAI({ system, messages }),
        tool_choice: toolChoiceToOpenAI(toolChoice),
        tools: toolsToOpenAI(tools),
-        temperature: 0,
      };
    }

--- a/x-pack/platform/plugins/shared/inference/server/chat_complete/api.ts
+++ b/x-pack/platform/plugins/shared/inference/server/chat_complete/api.ts
@ -5,7 +5,7 @@
 * 2.0.
 */

-import { last } from 'lodash';
+import { last, omit } from 'lodash';
 import { defer, switchMap, throwError } from 'rxjs';
 import type { Logger } from '@kbn/logging';
 import type { KibanaRequest } from '@kbn/core-http-server';
@ -51,14 +51,26 @@ export function createChatCompleteApi({ request, actions, logger }: CreateChatCo
        const connectorType = connector.type;
        const inferenceAdapter = getInferenceAdapter(connectorType);

+        const messagesWithoutData = messages.map((message) => omit(message, 'data'));
+
        if (!inferenceAdapter) {
          return throwError(() =>
            createInferenceRequestError(`Adapter for type ${connectorType} not implemented`, 400)
          );
        }

-        logger.debug(() => `Sending request: ${JSON.stringify(last(messages))}`);
-        logger.trace(() => JSON.stringify({ messages, toolChoice, tools, system }));
+        logger.debug(
+          () => `Sending request, last message is: ${JSON.stringify(last(messagesWithoutData))}`
+        );
+
+        logger.trace(() =>
+          JSON.stringify({
+            messages: messagesWithoutData,
+            toolChoice,
+            tools,
+            system,
+          })
+        );

        return inferenceAdapter.chatComplete({
          system,
--- a/x-pack/platform/plugins/shared/inference/server/chat_complete/errors.ts
+++ b/x-pack/platform/plugins/shared/inference/server/chat_complete/errors.ts
@ -44,7 +44,7 @@ export function createToolValidationError(
    name?: string;
    arguments?: string;
    errorsText?: string;
-    toolCalls?: UnvalidatedToolCall[];
+    toolCalls: UnvalidatedToolCall[];
  }
 ): ChatCompletionToolValidationError {
  return new InferenceTaskError(ChatCompletionErrorCode.ToolValidationError, message, meta);
--- a/x-pack/platform/plugins/shared/inference/server/chat_complete/simulated_function_calling/wrap_with_simulated_function_calling.ts
+++ b/x-pack/platform/plugins/shared/inference/server/chat_complete/simulated_function_calling/wrap_with_simulated_function_calling.ts
@ -79,7 +79,7 @@ export function wrapWithSimulatedFunctionCalling({
  };
 }

-const convertToolResponseMessage = (message: ToolMessage<unknown>): UserMessage => {
+const convertToolResponseMessage = (message: ToolMessage): UserMessage => {
  return {
    role: MessageRole.User,
    content: JSON.stringify({
--- a/x-pack/platform/plugins/shared/inference/server/chat_complete/utils/chunks_into_message.test.ts
+++ b/x-pack/platform/plugins/shared/inference/server/chat_complete/utils/chunks_into_message.test.ts
@ -183,7 +183,7 @@ describe('chunksIntoMessage', () => {
    }

    await expect(async () => getMessage()).rejects.toThrowErrorMatchingInlineSnapshot(
-      `"Tool call arguments for myFunction were invalid"`
+      `"Tool call arguments for myFunction (001) were invalid"`
    );
  });

--- a/x-pack/platform/plugins/shared/inference/server/chat_complete/utils/chunks_into_message.ts
+++ b/x-pack/platform/plugins/shared/inference/server/chat_complete/utils/chunks_into_message.ts
@ -5,17 +5,17 @@
 * 2.0.
 */

-import { last, map, merge, OperatorFunction, scan, share } from 'rxjs';
-import type { Logger } from '@kbn/logging';
 import {
-  UnvalidatedToolCall,
-  ToolOptions,
  ChatCompletionChunkEvent,
  ChatCompletionEventType,
  ChatCompletionMessageEvent,
  ChatCompletionTokenCountEvent,
+  ToolOptions,
+  UnvalidatedToolCall,
  withoutTokenCountEvents,
 } from '@kbn/inference-common';
+import type { Logger } from '@kbn/logging';
+import { OperatorFunction, map, merge, share, toArray } from 'rxjs';
 import { validateToolCalls } from '../../util/validate_tool_calls';

 export function chunksIntoMessage<TToolOptions extends ToolOptions>({
@ -37,38 +37,36 @@ export function chunksIntoMessage<TToolOptions extends ToolOptions>({
      shared$,
      shared$.pipe(
        withoutTokenCountEvents(),
-        scan(
-          (prev, chunk) => {
-            prev.content += chunk.content ?? '';
+        toArray(),
+        map((chunks): ChatCompletionMessageEvent<TToolOptions> => {
+          const concatenatedChunk = chunks.reduce(
+            (prev, chunk) => {
+              prev.content += chunk.content ?? '';

-            chunk.tool_calls?.forEach((toolCall) => {
-              let prevToolCall = prev.tool_calls[toolCall.index];
-              if (!prevToolCall) {
-                prev.tool_calls[toolCall.index] = {
-                  function: {
-                    name: '',
-                    arguments: '',
-                  },
-                  toolCallId: '',
-                };
+              chunk.tool_calls?.forEach((toolCall) => {
+                let prevToolCall = prev.tool_calls[toolCall.index];
+                if (!prevToolCall) {
+                  prev.tool_calls[toolCall.index] = {
+                    function: {
+                      name: '',
+                      arguments: '',
+                    },
+                    toolCallId: '',
+                  };

-                prevToolCall = prev.tool_calls[toolCall.index];
-              }
+                  prevToolCall = prev.tool_calls[toolCall.index];
+                }

-              prevToolCall.function.name += toolCall.function.name;
-              prevToolCall.function.arguments += toolCall.function.arguments;
-              prevToolCall.toolCallId += toolCall.toolCallId;
-            });
+                prevToolCall.function.name += toolCall.function.name;
+                prevToolCall.function.arguments += toolCall.function.arguments;
+                prevToolCall.toolCallId += toolCall.toolCallId;
+              });
+
+              return prev;
+            },
+            { content: '', tool_calls: [] as UnvalidatedToolCall[] }
+          );

-            return prev;
-          },
-          {
-            content: '',
-            tool_calls: [] as UnvalidatedToolCall[],
-          }
-        ),
-        last(),
-        map((concatenatedChunk): ChatCompletionMessageEvent<TToolOptions> => {
          logger.debug(() => `Received completed message: ${JSON.stringify(concatenatedChunk)}`);

          const validatedToolCalls = validateToolCalls<TToolOptions>({
--- a/x-pack/platform/plugins/shared/inference/server/routes/chat_complete.ts
+++ b/x-pack/platform/plugins/shared/inference/server/routes/chat_complete.ts
@ -76,9 +76,11 @@ const chatCompleteBodySchema: Type<ChatCompleteRequestBody> = schema.object({
        name: schema.maybe(schema.string()),
      }),
      schema.object({
+        name: schema.string(),
        role: schema.literal(MessageRole.Tool),
        toolCallId: schema.string(),
        response: schema.recordOf(schema.string(), schema.any()),
+        data: schema.maybe(schema.recordOf(schema.string(), schema.any())),
      }),
    ])
  ),
--- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/actions/generate_esql.ts
+++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/actions/generate_esql.ts
@ -34,6 +34,7 @@ export const generateEsqlTask = <TToolOptions extends ToolOptions>({
  docBase,
  functionCalling,
  logger,
+  system,
 }: {
  connectorId: string;
  systemMessage: string;
@ -43,6 +44,7 @@ export const generateEsqlTask = <TToolOptions extends ToolOptions>({
  docBase: EsqlDocumentBase;
  functionCalling?: FunctionCallingMode;
  logger: Pick<Logger, 'debug'>;
+  system?: string;
 }) => {
  return function askLlmToRespond({
    documentationRequest: { commands, functions },
@ -97,7 +99,7 @@ export const generateEsqlTask = <TToolOptions extends ToolOptions>({

          When converting queries from one language to ES|QL, make sure that the functions are available
          and documented in ES|QL. E.g., for SPL's LEN, use LENGTH. For IF, use CASE.
-        `,
+          ${system ? `## Additional instructions\n\n${system}` : ''}`,
        messages: [
          ...messages,
          {
@ -106,6 +108,7 @@ export const generateEsqlTask = <TToolOptions extends ToolOptions>({
            toolCalls: [fakeRequestDocsToolCall],
          },
          {
+            name: fakeRequestDocsToolCall.function.name,
            role: MessageRole.Tool,
            response: {
              documentation: requestedDocumentation,
--- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/task.ts
+++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/task.ts
@ -21,6 +21,7 @@ export function naturalLanguageToEsql<TToolOptions extends ToolOptions>({
  toolChoice,
  logger,
  functionCalling,
+  system,
  ...rest
 }: NlToEsqlTaskParams<TToolOptions>): Observable<NlToEsqlTaskEvent<TToolOptions>> {
  return from(loadDocBase()).pipe(
@ -41,6 +42,7 @@ export function naturalLanguageToEsql<TToolOptions extends ToolOptions>({
          tools,
          toolChoice,
        },
+        system,
      });

      return requestDocumentation({
--- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/types.ts
+++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/types.ts
@ -29,5 +29,6 @@ export type NlToEsqlTaskParams<TToolOptions extends ToolOptions> = {
  connectorId: string;
  logger: Pick<Logger, 'debug'>;
  functionCalling?: FunctionCallingMode;
+  system?: string;
 } & TToolOptions &
  ({ input: string } | { messages: Message[] });
--- a/x-pack/platform/plugins/shared/inference/server/util/validate_tool_calls.test.ts
+++ b/x-pack/platform/plugins/shared/inference/server/util/validate_tool_calls.test.ts
@ -108,7 +108,7 @@ describe('validateToolCalls', () => {
      });
    }
    expect(() => validate()).toThrowErrorMatchingInlineSnapshot(
-      `"Tool call arguments for my_function were invalid"`
+      `"Tool call arguments for my_function (1) were invalid"`
    );

    try {
@ -119,6 +119,15 @@ describe('validateToolCalls', () => {
          arguments: JSON.stringify({ foo: 'bar' }),
          errorsText: `data must have required property 'bar'`,
          name: 'my_function',
+          toolCalls: [
+            {
+              function: {
+                arguments: JSON.stringify({ foo: 'bar' }),
+                name: 'my_function',
+              },
+              toolCallId: '1',
+            },
+          ],
        });
      } else {
        fail('Expected toolValidationError');
--- a/x-pack/platform/plugins/shared/inference/server/util/validate_tool_calls.ts
+++ b/x-pack/platform/plugins/shared/inference/server/util/validate_tool_calls.ts
@ -54,11 +54,12 @@ export function validateToolCalls<TToolOptions extends ToolOptions>({

    if (!valid) {
      throw createToolValidationError(
-        `Tool call arguments for ${toolCall.function.name} were invalid`,
+        `Tool call arguments for ${toolCall.function.name} (${toolCall.toolCallId}) were invalid`,
        {
          name: toolCall.function.name,
          errorsText: validator.errorsText(),
          arguments: toolCall.function.arguments,
+          toolCalls,
        }
      );
    }
--- a/x-pack/plugins/observability_solution/inventory/tsconfig.json
+++ b/x-pack/plugins/observability_solution/inventory/tsconfig.json
@ -51,6 +51,9 @@
    "@kbn/rule-data-utils",
    "@kbn/spaces-plugin",
    "@kbn/cloud-plugin",
+    "@kbn/observability-utils-browser",
+    "@kbn/observability-utils-server",
+    "@kbn/observability-utils-common",
    "@kbn/storybook",
    "@kbn/dashboard-plugin",
    "@kbn/deeplinks-analytics",
--- a/x-pack/plugins/observability_solution/investigate_app/.storybook/get_mock_investigate_app_services.tsx
+++ b/x-pack/plugins/observability_solution/investigate_app/.storybook/get_mock_investigate_app_services.tsx
@ -17,6 +17,7 @@ import { SearchBar, IUnifiedSearchPluginServices } from '@kbn/unified-search-plu
 import { KibanaContextProvider } from '@kbn/kibana-react-plugin/public';
 import { merge } from 'lodash';
 import { Storage } from '@kbn/kibana-utils-plugin/public';
+import { of } from 'rxjs';
 import type { EsqlQueryMeta } from '../public/services/esql';
 import type { InvestigateAppServices } from '../public/services/types';
 import { InvestigateAppKibanaContext } from '../public/hooks/use_kibana';
@ -54,6 +55,10 @@ export function getMockInvestigateAppContext(): DeeplyMockedKeys<InvestigateAppK
        }),
    },
    charts: {} as any,
+    investigateAppRepositoryClient: {
+      fetch: jest.fn().mockImplementation(() => Promise.resolve()),
+      stream: jest.fn().mockImplementation(() => of()) as any,
+    },
  };

  const core = coreMock.createStart();
--- a/x-pack/plugins/observability_solution/investigate_app/kibana.jsonc
+++ b/x-pack/plugins/observability_solution/investigate_app/kibana.jsonc
@ -2,8 +2,8 @@
  "type": "plugin",
  "id": "@kbn/investigate-app-plugin",
  "owner": "@elastic/obs-ux-management-team",
-  "group": "observability",
  "visibility": "private",
+  "group": "observability",
  "plugin": {
    "id": "investigateApp",
    "server": true,
@ -24,14 +24,22 @@
      "observability",
      "licensing",
      "ruleRegistry",
+      "inference",
+      "alerting",
+      "spaces",
+      "slo",
+      "apmDataAccess",
      "usageCollection"
    ],
+    "optionalPlugins": [
+      "observabilityAIAssistant",
+      "observabilityAIAssistantApp"
+    ],
    "requiredBundles": [
      "esql",
      "kibanaReact",
      "kibanaUtils"
    ],
-    "optionalPlugins": ["observabilityAIAssistant"],
    "extraPublicDirs": []
  }
 }
--- a/x-pack/plugins/observability_solution/investigate_app/public/api/index.ts
+++ b/x-pack/plugins/observability_solution/investigate_app/public/api/index.ts
@ -11,7 +11,7 @@ import type {
  ReturnOf,
  RouteRepositoryClient,
 } from '@kbn/server-route-repository';
-import { formatRequest } from '@kbn/server-route-repository-utils/src/format_request';
+import { createRepositoryClient } from '@kbn/server-route-repository-client';
 import type { InvestigateAppServerRouteRepository } from '../../server';

 type FetchOptions = Omit<HttpFetchOptions, 'body'> & {
@ -25,15 +25,15 @@ export type InvestigateAppAPIClientOptions = Omit<
  signal: AbortSignal | null;
 };

-export type InvestigateAppAPIClient = RouteRepositoryClient<
+export type InvestigateAppRepositoryClient = RouteRepositoryClient<
  InvestigateAppServerRouteRepository,
  InvestigateAppAPIClientOptions
->['fetch'];
+>;

-export type AutoAbortedInvestigateAppAPIClient = RouteRepositoryClient<
+export type AutoAbortedInvestigateAppRepositoryClient = RouteRepositoryClient<
  InvestigateAppServerRouteRepository,
  Omit<InvestigateAppAPIClientOptions, 'signal'>
->['fetch'];
+>;

 export type InvestigateAppAPIEndpoint = keyof InvestigateAppServerRouteRepository;

@ -45,19 +45,6 @@ export type APIReturnType<TEndpoint extends InvestigateAppAPIEndpoint> = ReturnO
 export type InvestigateAppAPIClientRequestParamsOf<TEndpoint extends InvestigateAppAPIEndpoint> =
  ClientRequestParamsOf<InvestigateAppServerRouteRepository, TEndpoint>;

-export function createCallInvestigateAppAPI(core: CoreStart | CoreSetup) {
-  return ((endpoint, options) => {
-    const { params } = options as unknown as {
-      params?: Partial<Record<string, any>>;
-    };
-
-    const { method, pathname, version } = formatRequest(endpoint, params?.path);
-
-    return core.http[method](pathname, {
-      ...options,
-      body: params && params.body ? JSON.stringify(params.body) : undefined,
-      query: params?.query,
-      version,
-    });
-  }) as InvestigateAppAPIClient;
+export function createInvestigateAppRepositoryClient(core: CoreStart | CoreSetup) {
+  return createRepositoryClient(core) as InvestigateAppRepositoryClient;
 }
--- a/x-pack/plugins/observability_solution/investigate_app/public/pages/details/components/assistant_hypothesis/assistant_hypothesis.tsx
+++ b/x-pack/plugins/observability_solution/investigate_app/public/pages/details/components/assistant_hypothesis/assistant_hypothesis.tsx
@ -4,19 +4,22 @@
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */
-import dedent from 'dedent';
-import {
-  ALERT_RULE_PARAMETERS,
-  ALERT_START,
-  ALERT_RULE_CATEGORY,
-  ALERT_REASON,
-} from '@kbn/rule-data-utils';
 import { i18n } from '@kbn/i18n';
-import { EntityWithSource } from '@kbn/investigation-shared';
-import React, { useCallback } from 'react';
+import type { RootCauseAnalysisEvent } from '@kbn/observability-ai-server/root_cause_analysis';
+import { EcsFieldsResponse } from '@kbn/rule-registry-plugin/common';
+import React, { useState, useRef, useEffect } from 'react';
+import { omit } from 'lodash';
+import {
+  ALERT_FLAPPING_HISTORY,
+  ALERT_RULE_EXECUTION_TIMESTAMP,
+  ALERT_RULE_EXECUTION_UUID,
+  EVENT_ACTION,
+  EVENT_KIND,
+} from '@kbn/rule-registry-plugin/common/technical_rule_data_field_names';
+import { isRequestAbortedError } from '@kbn/server-route-repository-client';
 import { useKibana } from '../../../../hooks/use_kibana';
 import { useInvestigation } from '../../contexts/investigation_context';
-import { useFetchEntities } from '../../../../hooks/use_fetch_entities';
+import { useUpdateInvestigation } from '../../../../hooks/use_update_investigation';

 export interface InvestigationContextualInsight {
  key: string;
@ -25,98 +28,177 @@ export interface InvestigationContextualInsight {
 }

 export function AssistantHypothesis({ investigationId }: { investigationId: string }) {
-  const { alert } = useInvestigation();
  const {
+    alert,
+    globalParams: { timeRange },
+    investigation,
+  } = useInvestigation();
+  const {
+    core: { notifications },
+    services: { investigateAppRepositoryClient },
    dependencies: {
      start: {
-        observabilityAIAssistant: {
-          ObservabilityAIAssistantContextualInsight,
-          getContextualInsightMessages,
-        },
+        observabilityAIAssistant: { useGenAIConnectors },
+        observabilityAIAssistantApp: { RootCauseAnalysisContainer },
      },
    },
  } = useKibana();
-  const { data: entitiesData } = useFetchEntities({
-    investigationId,
-    serviceName: alert?.['service.name'] ? `${alert?.['service.name']}` : undefined,
-    serviceEnvironment: alert?.['service.environment']
-      ? `${alert?.['service.environment']}`
-      : undefined,
-    hostName: alert?.['host.name'] ? `${alert?.['host.name']}` : undefined,
-    containerId: alert?.['container.id'] ? `${alert?.['container.id']}` : undefined,
-  });

-  const getAlertContextMessages = useCallback(async () => {
-    if (!getContextualInsightMessages || !alert) {
-      return [];
+  const { mutateAsync: updateInvestigation } = useUpdateInvestigation();
+
+  const { loading: loadingConnector, selectedConnector } = useGenAIConnectors();
+
+  const serviceName = alert?.['service.name'] as string | undefined;
+
+  const [events, setEvents] = useState<RootCauseAnalysisEvent[]>([]);
+  const [loading, setLoading] = useState(false);
+  const [error, setError] = useState<Error | undefined>(undefined);
+
+  const controllerRef = useRef(new AbortController());
+
+  useEffect(() => {
+    if (investigation?.rootCauseAnalysis) {
+      setEvents(investigation.rootCauseAnalysis.events);
    }
+  }, [investigation?.rootCauseAnalysis]);

-    const entities = entitiesData?.entities ?? [];
+  const [completeInBackground, setCompleteInBackground] = useState(true);

-    const entityContext = entities?.length
-      ? `
-      Alerts can optionally be associated with entities. Entities can be services, hosts, containers, or other resources. Entities can have metrics associated with them. 
-      
-      The alert that triggered this investigation is associated with the following entities: ${entities
-        .map((entity, index) => {
-          return dedent(`
-            ## Entity ${index + 1}:
-            ${formatEntityMetrics(entity)};
-          `);
-        })
-        .join('/n/n')}`
-      : '';
+  const runRootCauseAnalysis = ({
+    alert: nonNullishAlert,
+    connectorId,
+    serviceName: nonNullishServiceName,
+  }: {
+    alert: EcsFieldsResponse;
+    connectorId: string;
+    serviceName: string;
+  }) => {
+    const rangeFrom = timeRange.from;

-    return getContextualInsightMessages({
-      message: `I am investigating a failure in my system. I was made aware of the failure by an alert and I am trying to understand the root cause of the issue.`,
-      instructions: dedent(
-        `I'm an SRE. I am investigating a failure in my system. I was made aware of the failure via an alert. Your current task is to help me identify the root cause of the failure in my system.
+    const rangeTo = timeRange.to;

-        The rule that triggered the alert is a ${
-          alert[ALERT_RULE_CATEGORY]
-        } rule. The alert started at ${alert[ALERT_START]}. The alert reason is ${
-          alert[ALERT_REASON]
-        }. The rule parameters are ${JSON.stringify(ALERT_RULE_PARAMETERS)}.
+    setLoading(true);

-        ${entityContext}
+    setError(undefined);

-        Based on the alert details, suggest a root cause and next steps to mitigate the issue. 
-        
-        I do not have the alert details or entity details in front of me, so be sure to repeat the alert reason (${
-          alert[ALERT_REASON]
-        }), when the alert was triggered (${
-          alert[ALERT_START]
-        }), and the entity metrics in your response.
+    setEvents([]);

-        When displaying the entity metrics, please convert the metrics to a human-readable format. For example, convert "logRate" to "Log Rate" and "errorRate" to "Error Rate".
-        `
-      ),
-    });
-  }, [alert, getContextualInsightMessages, entitiesData?.entities]);
+    investigateAppRepositoryClient
+      .stream('POST /internal/observability/investigation/root_cause_analysis', {
+        params: {
+          body: {
+            investigationId,
+            connectorId,
+            context: `The user is investigating an alert for the ${serviceName} service,
+            and wants to find the root cause. Here is the alert:

-  if (!ObservabilityAIAssistantContextualInsight) {
+            ${JSON.stringify(sanitizeAlert(nonNullishAlert))}`,
+            rangeFrom,
+            rangeTo,
+            serviceName: nonNullishServiceName,
+            completeInBackground,
+          },
+        },
+        signal: controllerRef.current.signal,
+      })
+      .subscribe({
+        next: (event) => {
+          setEvents((prev) => {
+            return prev.concat(event.event);
+          });
+        },
+        error: (nextError) => {
+          if (!isRequestAbortedError(nextError)) {
+            notifications.toasts.addError(nextError, {
+              title: i18n.translate(
+                'xpack.investigateApp.assistantHypothesis.failedToLoadAnalysis',
+                {
+                  defaultMessage: `Failed to load analysis`,
+                }
+              ),
+            });
+            setError(nextError);
+          } else {
+            setError(
+              new Error(
+                i18n.translate('xpack.investigateApp.assistantHypothesis.analysisAborted', {
+                  defaultMessage: `Analysis was aborted`,
+                })
+              )
+            );
+          }
+
+          setLoading(false);
+        },
+        complete: () => {
+          setLoading(false);
+        },
+      });
+  };
+
+  if (!serviceName) {
    return null;
  }

-  return alert && entitiesData ? (
-    <ObservabilityAIAssistantContextualInsight
-      title={i18n.translate(
-        'xpack.investigateApp.assistantHypothesis.observabilityAIAssistantContextualInsight.helpMeInvestigateThisLabel',
-        { defaultMessage: 'Help me investigate this failure' }
-      )}
-      messages={getAlertContextMessages}
+  return (
+    <RootCauseAnalysisContainer
+      events={events}
+      loading={loading || loadingConnector}
+      completeInBackground={completeInBackground}
+      onCompleteInBackgroundClick={() => {
+        setCompleteInBackground(() => !completeInBackground);
+      }}
+      onStopAnalysisClick={() => {
+        controllerRef.current.abort();
+        controllerRef.current = new AbortController();
+      }}
+      onClearAnalysisClick={() => {
+        setEvents([]);
+        if (investigation?.rootCauseAnalysis) {
+          updateInvestigation({
+            investigationId,
+            payload: {
+              rootCauseAnalysis: {
+                events: [],
+              },
+            },
+          });
+        }
+      }}
+      onResetAnalysisClick={() => {
+        controllerRef.current.abort();
+        controllerRef.current = new AbortController();
+        if (alert && selectedConnector && serviceName) {
+          runRootCauseAnalysis({
+            alert,
+            connectorId: selectedConnector,
+            serviceName,
+          });
+        }
+      }}
+      error={error}
+      onStartAnalysisClick={() => {
+        if (alert && selectedConnector && serviceName) {
+          runRootCauseAnalysis({
+            alert,
+            connectorId: selectedConnector,
+            serviceName,
+          });
+        }
+      }}
    />
-  ) : null;
+  );
+}
+
+function sanitizeAlert(alert: EcsFieldsResponse) {
+  return omit(
+    alert,
+    ALERT_RULE_EXECUTION_TIMESTAMP,
+    '_index',
+    ALERT_FLAPPING_HISTORY,
+    EVENT_ACTION,
+    EVENT_KIND,
+    ALERT_RULE_EXECUTION_UUID,
+    '@timestamp'
+  );
 }
-const formatEntityMetrics = (entity: EntityWithSource): string => {
-  const entityMetrics = Object.entries(entity.metrics)
-    .map(([key, value]) => `${key}: ${value}`)
-    .join(', ');
-  const entitySources = entity.sources.map((source) => source.dataStream).join(', ');
-  return dedent(`
-    Entity name: ${entity.display_name}; 
-    Entity type: ${entity.type}; 
-    Entity metrics: ${entityMetrics}; 
-    Entity data streams: ${entitySources}
-  `);
-};
--- a/x-pack/plugins/observability_solution/investigate_app/public/plugin.tsx
+++ b/x-pack/plugins/observability_solution/investigate_app/public/plugin.tsx
@ -27,6 +27,7 @@ import type {
  InvestigateAppSetupDependencies,
  InvestigateAppStartDependencies,
 } from './types';
+import { createInvestigateAppRepositoryClient, InvestigateAppRepositoryClient } from './api';

 const getCreateEsqlService = once(() => import('./services/esql').then((m) => m.createEsqlService));

@ -41,6 +42,7 @@ export class InvestigateAppPlugin
 {
  logger: Logger;
  config: ConfigSchema;
+  repositoryClient!: InvestigateAppRepositoryClient;

  constructor(context: PluginInitializerContext<ConfigSchema>) {
    this.logger = context.logger.get();
@ -51,6 +53,8 @@ export class InvestigateAppPlugin
    coreSetup: CoreSetup<InvestigateAppStartDependencies, InvestigateAppPublicStart>,
    pluginsSetup: InvestigateAppSetupDependencies
  ): InvestigateAppPublicSetup {
+    this.repositoryClient = createInvestigateAppRepositoryClient(coreSetup);
+
    coreSetup.application.register({
      id: INVESTIGATE_APP_ID,
      title: i18n.translate('xpack.investigateApp.appTitle', {
@ -93,6 +97,7 @@ export class InvestigateAppPlugin
            lens: pluginsStart.lens,
          }),
          charts: pluginsStart.charts,
+          investigateAppRepositoryClient: this.repositoryClient,
        };

        ReactDOM.render(
@ -127,6 +132,7 @@ export class InvestigateAppPlugin
          start: pluginsStart,
        },
        services: {
+          investigateAppRepositoryClient: this.repositoryClient,
          esql: createEsqlService({
            data: pluginsStart.data,
            dataViews: pluginsStart.dataViews,
--- a/x-pack/plugins/observability_solution/investigate_app/public/services/types.ts
+++ b/x-pack/plugins/observability_solution/investigate_app/public/services/types.ts
@ -7,8 +7,10 @@

 import { ChartsPluginStart } from '@kbn/charts-plugin/public';
 import type { EsqlService } from './esql';
+import type { InvestigateAppRepositoryClient } from '../api';

 export interface InvestigateAppServices {
  esql: EsqlService;
  charts: ChartsPluginStart;
+  investigateAppRepositoryClient: InvestigateAppRepositoryClient;
 }
--- a/x-pack/plugins/observability_solution/investigate_app/public/types.ts
+++ b/x-pack/plugins/observability_solution/investigate_app/public/types.ts
@ -8,6 +8,10 @@ import type {
  ObservabilityAIAssistantPublicSetup,
  ObservabilityAIAssistantPublicStart,
 } from '@kbn/observability-ai-assistant-plugin/public';
+import type {
+  ObservabilityAIAssistantAppPublicSetup,
+  ObservabilityAIAssistantAppPublicStart,
+} from '@kbn/observability-ai-assistant-app-plugin/public';
 import { ChartsPluginStart } from '@kbn/charts-plugin/public';
 import type { ContentManagementPublicStart } from '@kbn/content-management-plugin/public';
 import type { DataPublicPluginSetup, DataPublicPluginStart } from '@kbn/data-plugin/public';
@ -43,6 +47,7 @@ export interface InvestigateAppSetupDependencies {
  investigate: InvestigatePublicSetup;
  observabilityShared: ObservabilitySharedPluginSetup;
  observabilityAIAssistant: ObservabilityAIAssistantPublicSetup;
+  observabilityAIAssistantApp: ObservabilityAIAssistantAppPublicSetup;
  lens: LensPublicSetup;
  dataViews: DataViewsPublicPluginSetup;
  data: DataPublicPluginSetup;
@ -58,6 +63,7 @@ export interface InvestigateAppStartDependencies {
  investigate: InvestigatePublicStart;
  observabilityShared: ObservabilitySharedPluginStart;
  observabilityAIAssistant: ObservabilityAIAssistantPublicStart;
+  observabilityAIAssistantApp: ObservabilityAIAssistantAppPublicStart;
  lens: LensPublicStart;
  dataViews: DataViewsPublicPluginStart;
  data: DataPublicPluginStart;
--- a/x-pack/plugins/observability_solution/investigate_app/server/routes/get_global_investigate_app_server_route_repository.ts
+++ b/x-pack/plugins/observability_solution/investigate_app/server/routes/get_global_investigate_app_server_route_repository.ts
@ -15,18 +15,19 @@ import {
  findInvestigationsParamsSchema,
  getAllInvestigationStatsParamsSchema,
  getAllInvestigationTagsParamsSchema,
+  getEntitiesParamsSchema,
+  GetEntitiesResponse,
+  getEventsParamsSchema,
+  GetEventsResponse,
  getInvestigationItemsParamsSchema,
  getInvestigationNotesParamsSchema,
  getInvestigationParamsSchema,
  updateInvestigationItemParamsSchema,
  updateInvestigationNoteParamsSchema,
  updateInvestigationParamsSchema,
-  getEventsParamsSchema,
-  GetEventsResponse,
-  getEntitiesParamsSchema,
-  GetEntitiesResponse,
 } from '@kbn/investigation-shared';
 import { ScopedAnnotationsClient } from '@kbn/observability-plugin/server';
+import { createEntitiesESClient } from '../clients/create_entities_es_client';
 import { createInvestigation } from '../services/create_investigation';
 import { createInvestigationItem } from '../services/create_investigation_item';
 import { createInvestigationNote } from '../services/create_investigation_note';
@ -34,20 +35,20 @@ import { deleteInvestigation } from '../services/delete_investigation';
 import { deleteInvestigationItem } from '../services/delete_investigation_item';
 import { deleteInvestigationNote } from '../services/delete_investigation_note';
 import { findInvestigations } from '../services/find_investigations';
+import { AlertsClient, getAlertsClient } from '../services/get_alerts_client';
+import { getAllInvestigationStats } from '../services/get_all_investigation_stats';
 import { getAllInvestigationTags } from '../services/get_all_investigation_tags';
+import { getEntitiesWithSource } from '../services/get_entities';
+import { getAlertEvents, getAnnotationEvents } from '../services/get_events';
 import { getInvestigation } from '../services/get_investigation';
 import { getInvestigationItems } from '../services/get_investigation_items';
 import { getInvestigationNotes } from '../services/get_investigation_notes';
 import { investigationRepositoryFactory } from '../services/investigation_repository';
 import { updateInvestigation } from '../services/update_investigation';
-import { getAlertEvents, getAnnotationEvents } from '../services/get_events';
-import { AlertsClient, getAlertsClient } from '../services/get_alerts_client';
 import { updateInvestigationItem } from '../services/update_investigation_item';
 import { updateInvestigationNote } from '../services/update_investigation_note';
 import { createInvestigateAppServerRoute } from './create_investigate_app_server_route';
-import { getAllInvestigationStats } from '../services/get_all_investigation_stats';
-import { getEntitiesWithSource } from '../services/get_entities';
-import { createEntitiesESClient } from '../clients/create_entities_es_client';
+import { rootCauseAnalysisRoute } from './rca/route';

 const createInvestigationRoute = createInvestigateAppServerRoute({
  endpoint: 'POST /api/observability/investigations 2023-10-31',
@ -400,6 +401,7 @@ export function getGlobalInvestigateAppServerRouteRepository() {
    ...getEntitiesRoute,
    ...getAllInvestigationStatsRoute,
    ...getAllInvestigationTagsRoute,
+    ...rootCauseAnalysisRoute,
  };
 }

--- a/x-pack/plugins/observability_solution/investigate_app/server/routes/rca/route.ts
+++ b/x-pack/plugins/observability_solution/investigate_app/server/routes/rca/route.ts
@ -0,0 +1,163 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { Observable, catchError, from, of, share, switchMap, toArray } from 'rxjs';
+import { ServerSentEventBase } from '@kbn/sse-utils';
+import {
+  RootCauseAnalysisEvent,
+  runRootCauseAnalysis,
+} from '@kbn/observability-ai-server/root_cause_analysis';
+import { z } from '@kbn/zod';
+import datemath from '@elastic/datemath';
+import { OBSERVABILITY_LOGS_DATA_ACCESS_LOG_SOURCES_ID } from '@kbn/management-settings-ids';
+import { createObservabilityEsClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client';
+import { preconditionFailed } from '@hapi/boom';
+import { createInvestigateAppServerRoute } from '../create_investigate_app_server_route';
+import { investigationRepositoryFactory } from '../../services/investigation_repository';
+
+export const rootCauseAnalysisRoute = createInvestigateAppServerRoute({
+  endpoint: 'POST /internal/observability/investigation/root_cause_analysis',
+  options: {
+    tags: [],
+  },
+  params: z.object({
+    body: z.object({
+      investigationId: z.string(),
+      rangeFrom: z.string(),
+      rangeTo: z.string(),
+      serviceName: z.string(),
+      context: z.string(),
+      connectorId: z.string(),
+      completeInBackground: z.boolean().optional(),
+    }),
+  }),
+  handler: async ({
+    params,
+    plugins,
+    request,
+    context: requestContext,
+    logger,
+  }): Promise<Observable<ServerSentEventBase<'event', { event: RootCauseAnalysisEvent }>>> => {
+    const {
+      body: {
+        investigationId,
+        context,
+        rangeFrom,
+        rangeTo,
+        serviceName,
+        connectorId,
+        completeInBackground,
+      },
+    } = params;
+
+    if (!plugins.observabilityAIAssistant) {
+      throw preconditionFailed('Observability AI Assistant plugin is not available');
+    }
+
+    const start = datemath.parse(rangeFrom)?.valueOf()!;
+    const end = datemath.parse(rangeTo)?.valueOf()!;
+
+    const coreContext = await requestContext.core;
+
+    const coreEsClient = coreContext.elasticsearch.client.asCurrentUser;
+    const soClient = coreContext.savedObjects.client;
+    const uiSettingsClient = coreContext.uiSettings.client;
+
+    const repository = investigationRepositoryFactory({ soClient, logger });
+
+    const esClient = createObservabilityEsClient({
+      client: coreEsClient,
+      logger,
+      plugin: 'investigateApp',
+    });
+
+    const [
+      investigation,
+      rulesClient,
+      alertsClient,
+      inferenceClient,
+      observabilityAIAssistantClient,
+      spaceId = 'default',
+      apmIndices,
+      logSources,
+      sloSummaryIndices,
+    ] = await Promise.all([
+      repository.findById(investigationId),
+      (await plugins.alerting.start()).getRulesClientWithRequest(request),
+      (await plugins.ruleRegistry.start()).getRacClientWithRequest(request),
+      (await plugins.inference.start()).getClient({ request }),
+      plugins
+        .observabilityAIAssistant!.start()
+        .then((observabilityAIAssistantStart) =>
+          observabilityAIAssistantStart.service.getClient({ request, scopes: ['observability'] })
+        ),
+      (await plugins.spaces?.start())?.spacesService.getSpaceId(request),
+      plugins.apmDataAccess.setup.getApmIndices(soClient),
+      uiSettingsClient.get(OBSERVABILITY_LOGS_DATA_ACCESS_LOG_SOURCES_ID) as Promise<string[]>,
+      (await plugins.slo.start()).getSloClientWithRequest(request).getSummaryIndices(),
+    ]);
+
+    const next$ = runRootCauseAnalysis({
+      alertsClient,
+      connectorId,
+      start,
+      end,
+      esClient,
+      inferenceClient,
+      indices: {
+        logs: logSources,
+        traces: [apmIndices.span, apmIndices.error, apmIndices.transaction],
+        sloSummaries: sloSummaryIndices,
+      },
+      rulesClient,
+      observabilityAIAssistantClient,
+      serviceName,
+      spaceId,
+      context,
+      logger,
+    }).pipe(
+      switchMap((event) => {
+        return of({
+          type: 'event' as const,
+          event,
+        });
+      })
+    );
+
+    if (completeInBackground) {
+      const shared$ = next$.pipe(share());
+
+      shared$
+        .pipe(
+          toArray(),
+          catchError(() => {
+            return of();
+          }),
+          switchMap((events) => {
+            return from(
+              repository.save({
+                ...investigation,
+                rootCauseAnalysis: {
+                  events: events.map(({ event }) => event),
+                },
+              })
+            );
+          })
+        )
+        .subscribe({
+          error: (error) => {
+            logger.error(`Failed to update investigation: ${error.message}`);
+            logger.error(error);
+          },
+        });
+
+      return shared$;
+    }
+
+    return next$;
+  },
+});
--- a/x-pack/plugins/observability_solution/investigate_app/server/types.ts
+++ b/x-pack/plugins/observability_solution/investigate_app/server/types.ts
@ -5,11 +5,23 @@
 * 2.0.
 */

-import { ObservabilityPluginSetup } from '@kbn/observability-plugin/server';
-import {
+import type { ObservabilityPluginSetup } from '@kbn/observability-plugin/server';
+import type {
  RuleRegistryPluginSetupContract,
  RuleRegistryPluginStartContract,
 } from '@kbn/rule-registry-plugin/server';
+import type { AlertingServerSetup, AlertingServerStart } from '@kbn/alerting-plugin/server/plugin';
+import type { SLOServerStart, SLOServerSetup } from '@kbn/slo-plugin/server';
+import type { InferenceServerStart, InferenceServerSetup } from '@kbn/inference-plugin/server';
+import type { SpacesPluginSetup, SpacesPluginStart } from '@kbn/spaces-plugin/server';
+import type {
+  ApmDataAccessPluginStart,
+  ApmDataAccessPluginSetup,
+} from '@kbn/apm-data-access-plugin/server';
+import type {
+  ObservabilityAIAssistantServerStart,
+  ObservabilityAIAssistantServerSetup,
+} from '@kbn/observability-ai-assistant-plugin/server';
 import { UsageCollectionSetup } from '@kbn/usage-collection-plugin/server';

 /* eslint-disable @typescript-eslint/no-empty-interface*/
@ -19,11 +31,23 @@ export interface ConfigSchema {}
 export interface InvestigateAppSetupDependencies {
  observability: ObservabilityPluginSetup;
  ruleRegistry: RuleRegistryPluginSetupContract;
+  slo: SLOServerSetup;
+  alerting: AlertingServerSetup;
+  inference: InferenceServerSetup;
+  spaces?: SpacesPluginSetup;
+  apmDataAccess: ApmDataAccessPluginSetup;
+  observabilityAIAssistant?: ObservabilityAIAssistantServerSetup;
  usageCollection: UsageCollectionSetup;
 }

 export interface InvestigateAppStartDependencies {
  ruleRegistry: RuleRegistryPluginStartContract;
+  slo: SLOServerStart;
+  alerting: AlertingServerStart;
+  inference: InferenceServerStart;
+  spaces?: SpacesPluginStart;
+  apmDataAccess: ApmDataAccessPluginStart;
+  observabilityAIAssistant?: ObservabilityAIAssistantServerStart;
 }

 export interface InvestigateAppServerSetup {}
--- a/x-pack/plugins/observability_solution/investigate_app/tsconfig.json
+++ b/x-pack/plugins/observability_solution/investigate_app/tsconfig.json
@ -17,57 +17,67 @@
    ".storybook/**/*.js"
  ],
  "kbn_references": [
+    "@kbn/esql",
    "@kbn/core",
+    "@kbn/data-views-plugin",
+    "@kbn/expressions-plugin",
+    "@kbn/kibana-utils-plugin",
+    "@kbn/utility-types-jest",
+    "@kbn/es-types",
+    "@kbn/data-plugin",
+    "@kbn/embeddable-plugin",
+    "@kbn/unified-search-plugin",
+    "@kbn/kibana-react-plugin",
+    "@kbn/server-route-repository",
+    "@kbn/server-route-repository-client",
    "@kbn/react-kibana-context-theme",
    "@kbn/shared-ux-link-redirect-app",
-    "@kbn/kibana-react-plugin",
-    "@kbn/i18n",
-    "@kbn/embeddable-plugin",
-    "@kbn/observability-ai-assistant-plugin",
-    "@kbn/lens-plugin",
-    "@kbn/esql",
-    "@kbn/esql-utils",
-    "@kbn/data-plugin",
-    "@kbn/es-types",
-    "@kbn/field-types",
-    "@kbn/expressions-plugin",
-    "@kbn/deeplinks-observability",
-    "@kbn/logging",
-    "@kbn/data-views-plugin",
-    "@kbn/observability-shared-plugin",
-    "@kbn/config-schema",
-    "@kbn/investigate-plugin",
-    "@kbn/dataset-quality-plugin",
-    "@kbn/utility-types-jest",
-    "@kbn/content-management-plugin",
-    "@kbn/kibana-utils-plugin",
-    "@kbn/visualization-utils",
-    "@kbn/unified-search-plugin",
-    "@kbn/es-query",
-    "@kbn/server-route-repository",
-    "@kbn/security-plugin",
-    "@kbn/ui-actions-plugin",
-    "@kbn/server-route-repository-utils",
-    "@kbn/core-saved-objects-server",
-    "@kbn/rule-registry-plugin",
    "@kbn/shared-ux-router",
+    "@kbn/i18n",
    "@kbn/investigation-shared",
-    "@kbn/core-security-common",
-    "@kbn/saved-objects-finder-plugin",
-    "@kbn/presentation-containers",
+    "@kbn/lens-plugin",
+    "@kbn/rule-registry-plugin",
+    "@kbn/security-plugin",
+    "@kbn/rule-data-utils",
+    "@kbn/investigate-plugin",
+    "@kbn/observability-utils-browser",
    "@kbn/lens-embeddable-utils",
    "@kbn/i18n-react",
-    "@kbn/zod",
-    "@kbn/observability-plugin",
-    "@kbn/licensing-plugin",
-    "@kbn/rule-data-utils",
+    "@kbn/es-query",
+    "@kbn/saved-objects-finder-plugin",
+    "@kbn/presentation-containers",
+    "@kbn/observability-ai-server",
+    "@kbn/charts-plugin",
+    "@kbn/observability-shared-plugin",
+    "@kbn/core-security-common",
+    "@kbn/deeplinks-observability",
+    "@kbn/logging",
+    "@kbn/esql-utils",
+    "@kbn/observability-ai-assistant-plugin",
+    "@kbn/observability-ai-assistant-app-plugin",
+    "@kbn/content-management-plugin",
+    "@kbn/dataset-quality-plugin",
+    "@kbn/ui-actions-plugin",
+    "@kbn/field-types",
    "@kbn/entities-schema",
-    "@kbn/core-elasticsearch-server",
+    "@kbn/observability-plugin",
+    "@kbn/config-schema",
+    "@kbn/visualization-utils",
+    "@kbn/usage-collection-plugin",
    "@kbn/calculate-auto",
    "@kbn/ml-random-sampler-utils",
-    "@kbn/charts-plugin",
-    "@kbn/observability-utils-browser",
-    "@kbn/usage-collection-plugin",
+    "@kbn/zod",
    "@kbn/inference-common",
+    "@kbn/core-elasticsearch-server",
+    "@kbn/sse-utils",
+    "@kbn/management-settings-ids",
+    "@kbn/observability-utils-server",
+    "@kbn/licensing-plugin",
+    "@kbn/core-saved-objects-server",
+    "@kbn/alerting-plugin",
+    "@kbn/slo-plugin",
+    "@kbn/inference-plugin",
+    "@kbn/spaces-plugin",
+    "@kbn/apm-data-access-plugin",
  ],
 }
--- a/x-pack/plugins/observability_solution/observability_ai_assistant/common/convert_messages_for_inference.ts
+++ b/x-pack/plugins/observability_solution/observability_ai_assistant/common/convert_messages_for_inference.ts
@ -52,6 +52,7 @@ export function convertMessagesForInference(messages: Message[]): InferenceMessa
      }

      inferenceMessages.push({
+        name: message.message.name!,
        role: InferenceMessageRole.Tool,
        response: JSON.parse(message.message.content ?? '{}'),
        toolCallId: toolCallRequest.toolCalls![0].toolCallId,
--- a/Show more
+++ b/Show more