[8.x] [Inference] Inference CLI client (#214691) (#216202)

# Backport This will backport the following commits from `main` to `8.x`: - [[Inference] Inference CLI client (#214691)](https://github.com/elastic/kibana/pull/214691)  ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sorenlouv/backport)  --------- Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
2025-04-24 01:38:56 -04:00 · 2025-03-27 19:25:57 +01:00 · 2025-03-27 19:25:57 +01:00 · d48afc828c
commit d48afc828c
parent 02a0ac0d61
48 changed files with 1498 additions and 106 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@ -543,6 +543,7 @@ x-pack/platform/plugins/private/index_lifecycle_management @elastic/kibana-manag
 x-pack/platform/plugins/shared/index_management @elastic/kibana-management
 x-pack/platform/packages/shared/index-management/index_management_shared_types @elastic/kibana-management
 src/platform/test/plugin_functional/plugins/index_patterns @elastic/kibana-data-discovery
+x-pack/platform/packages/shared/kbn-inference-cli @elastic/appex-ai-infra
 x-pack/platform/packages/shared/ai-infra/inference-common @elastic/appex-ai-infra
 x-pack/platform/plugins/shared/inference_endpoint @elastic/ml-ui
 x-pack/platform/packages/shared/kbn-inference-endpoint-ui-common @elastic/response-ops @elastic/appex-ai-infra @elastic/obs-ai-assistant @elastic/security-generative-ai
@ -568,12 +569,14 @@ src/platform/packages/private/kbn-jest-serializers @elastic/kibana-operations
 src/platform/packages/private/kbn-journeys @elastic/kibana-operations @elastic/appex-qa
 packages/kbn-json-ast @elastic/kibana-operations
 x-pack/platform/packages/private/ml/json_schemas @elastic/ml-ui
+x-pack/solutions/observability/packages/kbn-genai-cli @elastic/obs-knowledge-team
 src/platform/test/health_gateway/plugins/status @elastic/kibana-core
 src/platform/test/plugin_functional/plugins/kbn_sample_panel_action @elastic/appex-sharedux
 src/platform/test/plugin_functional/plugins/kbn_top_nav @elastic/kibana-core
 src/platform/test/plugin_functional/plugins/kbn_tp_custom_visualizations @elastic/kibana-visualizations
 src/platform/test/interpreter_functional/plugins/kbn_tp_run_pipeline @elastic/kibana-core
 x-pack/platform/packages/shared/kbn-key-value-metadata-table @elastic/obs-ux-infra_services-team @elastic/obs-ux-logs-team
+x-pack/platform/packages/shared/kbn-kibana-api-cli @elastic/appex-ai-infra
 x-pack/test/functional_cors/plugins/kibana_cors_test @elastic/kibana-security
 packages/kbn-kibana-manifest-schema @elastic/kibana-operations
 src/platform/plugins/private/kibana_overview @elastic/appex-sharedux
--- a/package.json
+++ b/package.json
@ -615,6 +615,7 @@
    "@kbn/kbn-tp-custom-visualizations-plugin": "link:src/platform/test/plugin_functional/plugins/kbn_tp_custom_visualizations",
    "@kbn/kbn-tp-run-pipeline-plugin": "link:src/platform/test/interpreter_functional/plugins/kbn_tp_run_pipeline",
    "@kbn/key-value-metadata-table": "link:x-pack/platform/packages/shared/kbn-key-value-metadata-table",
+    "@kbn/kibana-api-cli": "link:x-pack/platform/packages/shared/kbn-kibana-api-cli",
    "@kbn/kibana-cors-test-plugin": "link:x-pack/test/functional_cors/plugins/kibana_cors_test",
    "@kbn/kibana-overview-plugin": "link:src/platform/plugins/private/kibana_overview",
    "@kbn/kibana-react-plugin": "link:src/platform/plugins/shared/kibana_react",
@ -1471,10 +1472,12 @@
    "@kbn/get-repo-files": "link:src/platform/packages/private/kbn-get-repo-files",
    "@kbn/import-locator": "link:packages/kbn-import-locator",
    "@kbn/import-resolver": "link:src/platform/packages/private/kbn-import-resolver",
+    "@kbn/inference-cli": "link:x-pack/platform/packages/shared/kbn-inference-cli",
    "@kbn/inventory-e2e": "link:x-pack/solutions/observability/plugins/inventory/e2e",
    "@kbn/jest-serializers": "link:src/platform/packages/private/kbn-jest-serializers",
    "@kbn/journeys": "link:src/platform/packages/private/kbn-journeys",
    "@kbn/json-ast": "link:packages/kbn-json-ast",
+    "@kbn/kbn-genai-cli": "link:x-pack/solutions/observability/packages/kbn-genai-cli",
    "@kbn/kibana-manifest-schema": "link:packages/kbn-kibana-manifest-schema",
    "@kbn/lint-packages-cli": "link:packages/kbn-lint-packages-cli",
    "@kbn/lint-ts-projects-cli": "link:packages/kbn-lint-ts-projects-cli",
--- a/src/platform/packages/shared/kbn-sse-utils-server/src/supertest_to_observable.ts
+++ b/src/platform/packages/shared/kbn-sse-utils-server/src/supertest_to_observable.ts
@ -36,7 +36,10 @@ export function supertestToObservable<T = any>(response: supertest.Test): Observ
  return new Observable<T>((subscriber) => {
    const parser = createParser({
      onEvent: (event) => {
-        subscriber.next(JSON.parse(event.data));
+        subscriber.next({
+          type: event.event ?? 'event',
+          ...JSON.parse(event.data),
+        });
      },
    });

--- a/src/platform/packages/shared/kbn-sse-utils/src/errors.ts
+++ b/src/platform/packages/shared/kbn-sse-utils/src/errors.ts
@ -23,6 +23,13 @@ export class ServerSentEventError<
    super(message);
  }

+  public get status() {
+    if (typeof this.meta === 'object' && this.meta.status) {
+      return this.meta.status as number;
+    }
+    return undefined;
+  }
+
  toJSON(): ServerSentErrorEvent {
    return {
      type: ServerSentEventType.error,
--- a/tsconfig.base.json
+++ b/tsconfig.base.json
@ -1080,6 +1080,8 @@
      "@kbn/index-management-shared-types/*": ["x-pack/platform/packages/shared/index-management/index_management_shared_types/*"],
      "@kbn/index-patterns-test-plugin": ["src/platform/test/plugin_functional/plugins/index_patterns"],
      "@kbn/index-patterns-test-plugin/*": ["src/platform/test/plugin_functional/plugins/index_patterns/*"],
+      "@kbn/inference-cli": ["x-pack/platform/packages/shared/kbn-inference-cli"],
+      "@kbn/inference-cli/*": ["x-pack/platform/packages/shared/kbn-inference-cli/*"],
      "@kbn/inference-common": ["x-pack/platform/packages/shared/ai-infra/inference-common"],
      "@kbn/inference-common/*": ["x-pack/platform/packages/shared/ai-infra/inference-common/*"],
      "@kbn/inference-endpoint-plugin": ["x-pack/platform/plugins/shared/inference_endpoint"],
@ -1130,6 +1132,8 @@
      "@kbn/json-ast/*": ["packages/kbn-json-ast/*"],
      "@kbn/json-schemas": ["x-pack/platform/packages/private/ml/json_schemas"],
      "@kbn/json-schemas/*": ["x-pack/platform/packages/private/ml/json_schemas/*"],
+      "@kbn/kbn-genai-cli": ["x-pack/solutions/observability/packages/kbn-genai-cli"],
+      "@kbn/kbn-genai-cli/*": ["x-pack/solutions/observability/packages/kbn-genai-cli/*"],
      "@kbn/kbn-health-gateway-status-plugin": ["src/platform/test/health_gateway/plugins/status"],
      "@kbn/kbn-health-gateway-status-plugin/*": ["src/platform/test/health_gateway/plugins/status/*"],
      "@kbn/kbn-sample-panel-action-plugin": ["src/platform/test/plugin_functional/plugins/kbn_sample_panel_action"],
@ -1142,6 +1146,8 @@
      "@kbn/kbn-tp-run-pipeline-plugin/*": ["src/platform/test/interpreter_functional/plugins/kbn_tp_run_pipeline/*"],
      "@kbn/key-value-metadata-table": ["x-pack/platform/packages/shared/kbn-key-value-metadata-table"],
      "@kbn/key-value-metadata-table/*": ["x-pack/platform/packages/shared/kbn-key-value-metadata-table/*"],
+      "@kbn/kibana-api-cli": ["x-pack/platform/packages/shared/kbn-kibana-api-cli"],
+      "@kbn/kibana-api-cli/*": ["x-pack/platform/packages/shared/kbn-kibana-api-cli/*"],
      "@kbn/kibana-cors-test-plugin": ["x-pack/test/functional_cors/plugins/kibana_cors_test"],
      "@kbn/kibana-cors-test-plugin/*": ["x-pack/test/functional_cors/plugins/kibana_cors_test/*"],
      "@kbn/kibana-manifest-schema": ["packages/kbn-kibana-manifest-schema"],
--- a/x-pack/platform/packages/shared/ai-infra/inference-common/index.ts
+++ b/x-pack/platform/packages/shared/ai-infra/inference-common/index.ts
@ -120,3 +120,5 @@ export {
  InferenceEndpointProvider,
  elasticModelIds,
 } from './src/inference_endpoints';
+
+export { Tokenizer } from './src/utils/tokenizer';
--- a/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/events.ts
+++ b/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/events.ts
@ -22,16 +22,19 @@ export enum ChatCompletionEventType {
 * the whole text content and potential tool calls of the response.
 */
 export type ChatCompletionMessageEvent<TToolOptions extends ToolOptions = ToolOptions> =
-  InferenceTaskEventBase<ChatCompletionEventType.ChatCompletionMessage> & {
-    /**
-     * The text content of the LLM response.
-     */
-    content: string;
-    /**
-     * The eventual tool calls performed by the LLM.
-     */
-    toolCalls: ToolCallsOf<TToolOptions>['toolCalls'];
-  };
+  InferenceTaskEventBase<
+    ChatCompletionEventType.ChatCompletionMessage,
+    {
+      /**
+       * The text content of the LLM response.
+       */
+      content: string;
+      /**
+       * The eventual tool calls performed by the LLM.
+       */
+      toolCalls: ToolCallsOf<TToolOptions>['toolCalls'];
+    }
+  >;

 /**
 * Represent a partial tool call present in a chunk event.
@ -64,8 +67,9 @@ export interface ChatCompletionChunkToolCall {
 * Chunk event, containing a fragment of the total content,
 * and potentially chunks of tool calls.
 */
-export type ChatCompletionChunkEvent =
-  InferenceTaskEventBase<ChatCompletionEventType.ChatCompletionChunk> & {
+export type ChatCompletionChunkEvent = InferenceTaskEventBase<
+  ChatCompletionEventType.ChatCompletionChunk,
+  {
    /**
     * The content chunk
     */
@ -74,7 +78,8 @@ export type ChatCompletionChunkEvent =
     * The tool call chunks
     */
    tool_calls: ChatCompletionChunkToolCall[];
-  };
+  }
+>;

 /**
 * Token count structure for the chatComplete API.
@ -98,13 +103,15 @@ export interface ChatCompletionTokenCount {
 * Token count event, send only once, usually (but not necessarily)
 * before the message event
 */
-export type ChatCompletionTokenCountEvent =
-  InferenceTaskEventBase<ChatCompletionEventType.ChatCompletionTokenCount> & {
+export type ChatCompletionTokenCountEvent = InferenceTaskEventBase<
+  ChatCompletionEventType.ChatCompletionTokenCount,
+  {
    /**
     * The token count structure
     */
    tokens: ChatCompletionTokenCount;
-  };
+  }
+>;

 /**
 * Events emitted from the {@link ChatCompleteResponse} observable
--- a/x-pack/platform/packages/shared/ai-infra/inference-common/src/errors.ts
+++ b/x-pack/platform/packages/shared/ai-infra/inference-common/src/errors.ts
@ -5,6 +5,7 @@
 * 2.0.
 */

+import { ServerSentEventError } from '@kbn/sse-utils';
 import { InferenceTaskEventBase, InferenceTaskEventType } from './inference_task';

 /**
@ -17,44 +18,22 @@ export enum InferenceTaskErrorCode {
  abortedError = 'requestAborted',
 }

-/**
- * Base class for all inference API errors.
- */
-export class InferenceTaskError<
+const InferenceTaskError = ServerSentEventError;
+type InferenceTaskError<
  TCode extends string,
  TMeta extends Record<string, any> | undefined
-> extends Error {
-  constructor(public code: TCode, message: string, public meta: TMeta) {
-    super(message);
-  }
+> = ServerSentEventError<TCode, TMeta>;

-  public get status() {
-    if (typeof this.meta === 'object' && this.meta.status) {
-      return this.meta.status as number;
-    }
-    return undefined;
-  }
-
-  toJSON(): InferenceTaskErrorEvent {
-    return {
-      type: InferenceTaskEventType.error,
-      error: {
-        code: this.code,
-        message: this.message,
-        meta: this.meta,
-      },
+export type InferenceTaskErrorEvent = InferenceTaskEventBase<
+  InferenceTaskEventType.error,
+  {
+    error: {
+      code: string;
+      message: string;
+      meta?: Record<string, any>;
    };
  }
-}
-
-export type InferenceTaskErrorEvent = InferenceTaskEventBase<InferenceTaskEventType.error> & {
-  error: {
-    code: string;
-    message: string;
-    meta?: Record<string, any>;
-  };
-};
-
+>;
 /**
 * Inference error thrown when an unexpected internal error occurs while handling the request.
 */
@ -162,3 +141,5 @@ export function isInferenceRequestAbortedError(error: unknown): error is Inferen
 export function isInferenceProviderError(error: unknown): error is InferenceTaskProviderError {
  return isInferenceError(error) && error.code === InferenceTaskErrorCode.providerError;
 }
+
+export { InferenceTaskError };
--- a/x-pack/platform/packages/shared/ai-infra/inference-common/src/inference_task.ts
+++ b/x-pack/platform/packages/shared/ai-infra/inference-common/src/inference_task.ts
@ -4,19 +4,18 @@
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */
+import type { ServerSentEventBase } from '@kbn/sse-utils';

 /**
 * Base interface for all inference events.
 */
-export interface InferenceTaskEventBase<TEventType extends string> {
-  /**
-   * Unique identifier of the event type.
-   */
-  type: TEventType;
-}
+export type InferenceTaskEventBase<
+  TEventType extends string,
+  TData extends Record<string, any>
+> = ServerSentEventBase<TEventType, TData>;

 export enum InferenceTaskEventType {
  error = 'error',
 }

-export type InferenceTaskEvent = InferenceTaskEventBase<string>;
+export type InferenceTaskEvent = InferenceTaskEventBase<string, Record<string, unknown>>;
--- a/x-pack/platform/packages/shared/ai-infra/inference-common/src/output/events.ts
+++ b/x-pack/platform/packages/shared/ai-infra/inference-common/src/output/events.ts
@ -23,8 +23,9 @@ export type Output = Record<string, any> | undefined | unknown;
 /**
 * Update (chunk) event for the {@link OutputAPI}
 */
-export type OutputUpdateEvent<TId extends string = string> =
-  InferenceTaskEventBase<OutputEventType.OutputUpdate> & {
+export type OutputUpdateEvent<TId extends string = string> = InferenceTaskEventBase<
+  OutputEventType.OutputUpdate,
+  {
    /**
     * The id of the operation, as provided as input
     */
@ -33,7 +34,8 @@ export type OutputUpdateEvent<TId extends string = string> =
     * The text content of the chunk
     */
    content: string;
-  };
+  }
+>;

 /**
 * Completion (complete message) event for the {@link OutputAPI}
@ -41,21 +43,24 @@ export type OutputUpdateEvent<TId extends string = string> =
 export type OutputCompleteEvent<
  TId extends string = string,
  TOutput extends Output = Output
-> = InferenceTaskEventBase<OutputEventType.OutputComplete> & {
-  /**
-   * The id of the operation, as provided as input
-   */
-  id: TId;
-  /**
-   * The task output, following the schema specified as input
-   */
-  output: TOutput;
-  /**
-   * Potential text content provided by the LLM,
-   * if it was provided in addition to the tool call
-   */
-  content: string;
-};
+> = InferenceTaskEventBase<
+  OutputEventType.OutputComplete,
+  {
+    /**
+     * The id of the operation, as provided as input
+     */
+    id: TId;
+    /**
+     * The task output, following the schema specified as input
+     */
+    output: TOutput;
+    /**
+     * Potential text content provided by the LLM,
+     * if it was provided in addition to the tool call
+     */
+    content: string;
+  }
+>;

 /**
 * Events emitted from the {@link OutputEvent}.
--- a/x-pack/platform/packages/shared/ai-infra/inference-common/src/utils/tokenizer.ts
+++ b/x-pack/platform/packages/shared/ai-infra/inference-common/src/utils/tokenizer.ts
@ -0,0 +1,32 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export class Tokenizer {
+  /**
+   * Approximates the number of tokens in a string,
+   * assuming 4 characters per token.
+   */
+  static count(input: string): number {
+    return Math.ceil(input.length / 4);
+  }
+
+  /**
+   * If the text is longer than the amount of tokens,
+   * truncate and mark as truncated.
+   */
+  static truncate(
+    input: string,
+    maxTokens: number
+  ): { tokens: number; truncated: boolean; text: string } {
+    const count = Tokenizer.count(input);
+    if (Tokenizer.count(input) > maxTokens) {
+      const maxChars = maxTokens * 4;
+      return { truncated: true, tokens: count, text: input.slice(0, maxChars) + '... <truncated>' };
+    }
+    return { truncated: false, tokens: count, text: input };
+  }
+}
--- a/x-pack/platform/packages/shared/ai-infra/inference-common/tsconfig.json
+++ b/x-pack/platform/packages/shared/ai-infra/inference-common/tsconfig.json
@ -16,5 +16,6 @@
    "target/**/*"
  ],
  "kbn_references": [
+    "@kbn/sse-utils",
  ]
 }
--- a/x-pack/platform/packages/shared/kbn-inference-cli/README.md
+++ b/x-pack/platform/packages/shared/kbn-inference-cli/README.md
@ -0,0 +1,40 @@
+# @kbn/inference-cli
+
+Exposes an Inference (plugin) API client for scripts, that mimicks the `chatComplete`
+and `output` APIs that are available on its start contract. It depends on the KibanaClient
+that is exposed from the `@kbn/kibana-api-cli` package. It automatically selects a
+connector if available. Usage:
+
+```ts
+import { createInferenceClient } from '@kbn/inference-cli';
+import { ToolingLog } from '@kbn/tooling-log';
+
+const log = new ToolingLog();
+const inferenceClient = await createInferenceClient({
+  log,
+  // pass in a signal that is triggered on teardown
+  signal: new AbortController().signal,
+});
+
+const response = await inferenceClient.output({
+  id: 'extract_personal_details',
+  input: `Sarah is a 29-year-old software developer living in San Francisco.`,
+  schema: {
+    type: 'object',
+    properties: {
+      name: { type: 'string' },
+      age: { type: 'number' },
+      city: { type: 'string' },
+    },
+    required: ['name'],
+  } as const,
+});
+
+log.info(response.output);
+```
+
+Running a recipe:
+
+```
+$ yarn run ts-node x-pack/solutions/observability/packages/kbn-genai-cli/recipes/hello_world.ts
+```
--- a/x-pack/platform/packages/shared/kbn-inference-cli/index.ts
+++ b/x-pack/platform/packages/shared/kbn-inference-cli/index.ts
@ -0,0 +1,8 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+export { createInferenceClient } from './src/create_inference_client';
+export type { InferenceCliClient } from './src/client';
--- a/x-pack/platform/packages/shared/kbn-inference-cli/jest.config.js
+++ b/x-pack/platform/packages/shared/kbn-inference-cli/jest.config.js
@ -0,0 +1,12 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+module.exports = {
+  preset: '@kbn/test/jest_node',
+  rootDir: '../../../../..',
+  roots: ['<rootDir>/x-pack/platform/packages/shared/kbn-inference-cli'],
+};
--- a/x-pack/platform/packages/shared/kbn-inference-cli/kibana.jsonc
+++ b/x-pack/platform/packages/shared/kbn-inference-cli/kibana.jsonc
@ -0,0 +1,8 @@
+{
+  "type": "shared-common",
+  "id": "@kbn/inference-cli",
+  "owner": "@elastic/appex-ai-infra",
+  "group": "platform",
+  "visibility": "shared",
+  "devOnly": true
+}
--- a/x-pack/platform/packages/shared/kbn-inference-cli/package.json
+++ b/x-pack/platform/packages/shared/kbn-inference-cli/package.json
@ -0,0 +1,6 @@
+{
+  "name": "@kbn/inference-cli",
+  "private": true,
+  "version": "1.0.0",
+  "license": "Elastic License 2.0"
+}
--- a/x-pack/platform/packages/shared/kbn-inference-cli/src/client.ts
+++ b/x-pack/platform/packages/shared/kbn-inference-cli/src/client.ts
@ -0,0 +1,140 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+import {
+  BoundChatCompleteAPI,
+  BoundOutputAPI,
+  ChatCompleteResponse,
+  ChatCompletionEvent,
+  ToolOptions,
+  UnboundChatCompleteOptions,
+  UnboundOutputOptions,
+} from '@kbn/inference-common';
+import { ToolSchemaTypeObject } from '@kbn/inference-common/src/chat_complete/tool_schema';
+import { ChatCompleteRequestBody, createOutputApi } from '@kbn/inference-plugin/common';
+import { httpResponseIntoObservable } from '@kbn/sse-utils-client';
+import { ToolingLog } from '@kbn/tooling-log';
+import { defer, from } from 'rxjs';
+import { KibanaClient } from '@kbn/kibana-api-cli';
+
+interface InferenceCliClientOptions {
+  log: ToolingLog;
+  kibanaClient: KibanaClient;
+  connectorId: string;
+  signal: AbortSignal;
+}
+
+function createChatComplete(options: InferenceCliClientOptions): BoundChatCompleteAPI;
+
+function createChatComplete({ connectorId, kibanaClient, signal }: InferenceCliClientOptions) {
+  return <TToolOptions extends ToolOptions, TStream extends boolean = false>(
+    options: UnboundChatCompleteOptions<TToolOptions, TStream>
+  ) => {
+    const {
+      messages,
+      abortSignal,
+      maxRetries,
+      metadata: _metadata,
+      modelName,
+      retryConfiguration,
+      stream,
+      system,
+      temperature,
+      toolChoice,
+      tools,
+    } = options;
+
+    const body: ChatCompleteRequestBody = {
+      connectorId,
+      messages,
+      modelName,
+      system,
+      temperature,
+      toolChoice,
+      tools,
+      maxRetries,
+      retryConfiguration:
+        retryConfiguration && typeof retryConfiguration.retryOn === 'string'
+          ? {
+              retryOn: retryConfiguration.retryOn,
+            }
+          : undefined,
+    };
+
+    if (stream) {
+      return defer(() => {
+        return from(
+          kibanaClient
+            .fetch(`/internal/inference/chat_complete/stream`, {
+              method: 'POST',
+              body: JSON.stringify(body),
+              asRawResponse: true,
+              signal: combineSignal(signal, abortSignal),
+            })
+            .then((response) => ({ response }))
+        );
+      }).pipe(httpResponseIntoObservable<ChatCompletionEvent<TToolOptions>>());
+    }
+
+    return kibanaClient.fetch<ChatCompleteResponse<TToolOptions>>(
+      `/internal/inference/chat_complete`,
+      {
+        method: 'POST',
+        body: JSON.stringify(body),
+        signal: combineSignal(signal, abortSignal),
+      }
+    );
+  };
+}
+
+function combineSignal(left: AbortSignal, right?: AbortSignal) {
+  if (!right) {
+    return left;
+  }
+  const controller = new AbortController();
+
+  left.addEventListener('abort', () => {
+    controller.abort();
+  });
+  right?.addEventListener('abort', () => {
+    controller.abort();
+  });
+
+  return controller.signal;
+}
+
+export class InferenceCliClient {
+  private readonly boundChatCompleteAPI: BoundChatCompleteAPI;
+  private readonly boundOutputAPI: BoundOutputAPI;
+  constructor(options: InferenceCliClientOptions) {
+    this.boundChatCompleteAPI = createChatComplete(options);
+
+    const outputAPI = createOutputApi(this.boundChatCompleteAPI);
+
+    this.boundOutputAPI = <
+      TId extends string,
+      TOutputSchema extends ToolSchemaTypeObject | undefined,
+      TStream extends boolean = false
+    >(
+      outputOptions: UnboundOutputOptions<TId, TOutputSchema, TStream>
+    ) => {
+      options.log.debug(`Running task ${outputOptions.id}`);
+      return outputAPI({
+        ...outputOptions,
+        connectorId: options.connectorId,
+        abortSignal: combineSignal(options.signal, outputOptions.abortSignal),
+      });
+    };
+  }
+
+  chatComplete: BoundChatCompleteAPI = (options) => {
+    return this.boundChatCompleteAPI(options);
+  };
+
+  output: BoundOutputAPI = (options) => {
+    return this.boundOutputAPI(options);
+  };
+}
--- a/x-pack/platform/packages/shared/kbn-inference-cli/src/create_inference_client.ts
+++ b/x-pack/platform/packages/shared/kbn-inference-cli/src/create_inference_client.ts
@ -0,0 +1,34 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { ToolingLog } from '@kbn/tooling-log';
+import { KibanaClient, createKibanaClient } from '@kbn/kibana-api-cli';
+import { InferenceCliClient } from './client';
+import { selectConnector } from './select_connector';
+
+export async function createInferenceClient({
+  log,
+  prompt,
+  signal,
+  kibanaClient,
+}: {
+  log: ToolingLog;
+  prompt?: boolean;
+  signal: AbortSignal;
+  kibanaClient?: KibanaClient;
+}): Promise<InferenceCliClient> {
+  kibanaClient = kibanaClient || (await createKibanaClient({ log, signal }));
+
+  const connector = await selectConnector({ log, kibanaClient, prompt });
+
+  return new InferenceCliClient({
+    log,
+    kibanaClient,
+    connectorId: connector.connectorId,
+    signal,
+  });
+}
--- a/x-pack/platform/packages/shared/kbn-inference-cli/src/get_connector.ts
+++ b/x-pack/platform/packages/shared/kbn-inference-cli/src/get_connector.ts
@ -0,0 +1,17 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { InferenceConnector } from '@kbn/inference-common';
+import { KibanaClient } from '@kbn/kibana-api-cli';
+
+export async function getConnectors(kibanaClient: KibanaClient): Promise<InferenceConnector[]> {
+  const { connectors } = await kibanaClient.fetch<{
+    connectors: InferenceConnector[];
+  }>('/internal/inference/connectors');
+
+  return connectors;
+}
--- a/x-pack/platform/packages/shared/kbn-inference-cli/src/select_connector.ts
+++ b/x-pack/platform/packages/shared/kbn-inference-cli/src/select_connector.ts
@ -0,0 +1,67 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { ToolingLog } from '@kbn/tooling-log';
+import inquirer from 'inquirer';
+import { InferenceConnector } from '@kbn/inference-common';
+import { KibanaClient } from '@kbn/kibana-api-cli';
+import { getConnectors } from './get_connector';
+
+export async function selectConnector({
+  log,
+  kibanaClient,
+  prompt = true,
+  preferredConnectorId,
+}: {
+  log: ToolingLog;
+  kibanaClient: KibanaClient;
+  prompt?: boolean;
+  preferredConnectorId?: string;
+}): Promise<InferenceConnector> {
+  const connectors = await getConnectors(kibanaClient);
+
+  if (!connectors.length) {
+    throw new Error(
+      `No connectors available for inference. See https://www.elastic.co/guide/en/kibana/current/action-types.html`
+    );
+  }
+
+  const connector = connectors.find((item) => item.connectorId === preferredConnectorId);
+
+  if (!connector && preferredConnectorId) {
+    log.warning(`Could not find connector ${preferredConnectorId}`);
+  }
+
+  const firstConnector = connectors[0];
+
+  const onlyOneConnector = connectors.length === 1;
+
+  if (onlyOneConnector) {
+    log.debug('Using the only connector found');
+    return firstConnector;
+  }
+
+  if (prompt) {
+    const connectorChoice = await inquirer.prompt({
+      type: 'list',
+      name: 'connector',
+      message: `Select a connector`,
+      choices: connectors.map((item) => ({
+        name: `${item.name} (${item.connectorId})`,
+        value: item.connectorId,
+      })),
+    });
+
+    const selectedConnector = connectors.find(
+      (item) => item.connectorId === connectorChoice.connector
+    )!;
+
+    return selectedConnector;
+  }
+
+  return firstConnector;
+}
--- a/x-pack/platform/packages/shared/kbn-inference-cli/tsconfig.json
+++ b/x-pack/platform/packages/shared/kbn-inference-cli/tsconfig.json
@ -0,0 +1,23 @@
+{
+  "extends": "../../../../../tsconfig.base.json",
+  "compilerOptions": {
+    "outDir": "target/types",
+    "types": [
+      "jest",
+      "node"
+    ]
+  },
+  "include": [
+    "**/*.ts",
+  ],
+  "exclude": [
+    "target/**/*"
+  ],
+  "kbn_references": [
+    "@kbn/inference-common",
+    "@kbn/inference-plugin",
+    "@kbn/sse-utils-client",
+    "@kbn/tooling-log",
+    "@kbn/kibana-api-cli",
+  ]
+}
--- a/x-pack/platform/packages/shared/kbn-kibana-api-cli/README.md
+++ b/x-pack/platform/packages/shared/kbn-kibana-api-cli/README.md
@ -0,0 +1,25 @@
+# @kbn/kibana-api-cli
+
+Exposes a Kibana API client for usage in scripts. It:
+
+- attempts to automatically discover Kibana, using localhost and some common credentials
+- sets the right headers to be able to talk to the API
+- exposes an Elasticsearch client that uses the /api/console/proxy endpoint
+
+Usage:
+
+```ts
+import { ToolingLog } from '@kbn/tooling-log';
+import { InferenceConnector } from '@kbn/inference-common';
+
+export async function getConnectors({ log, signal }:{log:ToolingLog; signal:AbortSignal }): Promise<InferenceConnector[]> {
+
+  await createKibanaClient({ log, signal }));
+
+  const { connectors } = await kibanaClient.fetch<{
+    connectors: InferenceConnector[];
+  }>('/internal/inference/connectors');
+
+  return connectors;
+}
+```
--- a/x-pack/platform/packages/shared/kbn-kibana-api-cli/index.ts
+++ b/x-pack/platform/packages/shared/kbn-kibana-api-cli/index.ts
@ -0,0 +1,11 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export { discoverKibanaUrl } from './src/discover_kibana_url';
+export { KibanaClient } from './src/client';
+export { createKibanaClient } from './src/create_kibana_client';
+export { FetchResponseError } from './src/kibana_fetch_response_error';
--- a/x-pack/platform/packages/shared/kbn-kibana-api-cli/jest.config.js
+++ b/x-pack/platform/packages/shared/kbn-kibana-api-cli/jest.config.js
@ -0,0 +1,12 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+module.exports = {
+  preset: '@kbn/test/jest_node',
+  rootDir: '../../../../..',
+  roots: ['<rootDir>/x-pack/platform/packages/shared/kbn-kibana-api-cli'],
+};
--- a/x-pack/platform/packages/shared/kbn-kibana-api-cli/kibana.jsonc
+++ b/x-pack/platform/packages/shared/kbn-kibana-api-cli/kibana.jsonc
@ -0,0 +1,7 @@
+{
+  "type": "shared-common",
+  "id": "@kbn/kibana-api-cli",
+  "owner": "@elastic/appex-ai-infra",
+  "group": "platform",
+  "visibility": "shared"
+}
--- a/x-pack/platform/packages/shared/kbn-kibana-api-cli/package.json
+++ b/x-pack/platform/packages/shared/kbn-kibana-api-cli/package.json
@ -0,0 +1,6 @@
+{
+  "name": "@kbn/kibana-api-cli",
+  "private": true,
+  "version": "1.0.0",
+  "license": "Elastic License 2.0"
+}
--- a/x-pack/platform/packages/shared/kbn-kibana-api-cli/src/client.ts
+++ b/x-pack/platform/packages/shared/kbn-kibana-api-cli/src/client.ts
@ -0,0 +1,122 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+import { Client } from '@elastic/elasticsearch';
+import { compact } from 'lodash';
+import { format, parse } from 'node:url';
+import Path from 'path';
+import { UrlWithParsedQuery } from 'url';
+import { FetchResponseError } from './kibana_fetch_response_error';
+import { createProxyTransport } from './proxy_transport';
+import { getInternalKibanaHeaders } from './get_internal_kibana_headers';
+
+type FetchInputOptions = string | URL;
+type FetchInitOptions = globalThis.RequestInit;
+
+interface KibanaClientOptions {
+  baseUrl: string;
+  spaceId?: string;
+  signal: AbortSignal;
+}
+
+function combineSignal(left: AbortSignal, right?: AbortSignal | null | undefined) {
+  if (!right) {
+    return left;
+  }
+  const controller = new AbortController();
+
+  left.addEventListener('abort', () => {
+    controller.abort();
+  });
+
+  right?.addEventListener('abort', () => {
+    controller.abort();
+  });
+
+  return controller.signal;
+}
+
+export class KibanaClient {
+  public readonly es: Client;
+  constructor(private readonly options: KibanaClientOptions) {
+    const parsedBaseUrl = parse(options.baseUrl, true);
+
+    const [username, password] = (parsedBaseUrl.auth ?? '').split(':');
+
+    const node = format({
+      ...parsedBaseUrl,
+      auth: null,
+      pathname: null,
+    });
+
+    this.es = new Client({
+      auth: {
+        username,
+        password,
+      },
+      node,
+      Transport: createProxyTransport({
+        pathname: parsedBaseUrl.pathname!,
+        headers: getInternalKibanaHeaders(),
+      }),
+    });
+  }
+
+  fetch(
+    options: FetchInputOptions,
+    init: FetchInitOptions & { asRawResponse: true }
+  ): Promise<Response>;
+
+  fetch<T>(options: FetchInputOptions, init?: FetchInitOptions): Promise<T>;
+
+  async fetch<T>(
+    options: FetchInputOptions,
+    init?: FetchInitOptions & { asRawResponse?: boolean }
+  ): Promise<T | Response> {
+    const urlObject =
+      typeof options === 'string'
+        ? {
+            pathname: options,
+          }
+        : options;
+
+    const formattedBaseUrl = parse(this.options.baseUrl, true);
+
+    const urlOptions: UrlWithParsedQuery = {
+      ...formattedBaseUrl,
+      ...urlObject,
+      pathname: Path.posix.join(
+        ...compact([
+          '/',
+          formattedBaseUrl.pathname,
+          ...(this.options.spaceId ? ['s', this.options.spaceId] : []),
+          urlObject.pathname,
+        ])
+      ),
+      auth: null,
+    };
+
+    const response = await fetch(format(urlOptions), {
+      ...init,
+      headers: {
+        ...getInternalKibanaHeaders(),
+        Authorization: `Basic ${Buffer.from(formattedBaseUrl.auth!).toString('base64')}`,
+        ...init?.headers,
+      },
+      signal: combineSignal(this.options.signal, init?.signal),
+    });
+
+    if (init?.asRawResponse) {
+      return response;
+    }
+
+    if (response.status >= 400) {
+      throw new FetchResponseError(response);
+    }
+
+    return response.json() as Promise<T>;
+  }
+}
--- a/x-pack/platform/packages/shared/kbn-kibana-api-cli/src/create_kibana_client.ts
+++ b/x-pack/platform/packages/shared/kbn-kibana-api-cli/src/create_kibana_client.ts
@ -0,0 +1,23 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { ToolingLog } from '@kbn/tooling-log';
+import { discoverKibanaUrl } from './discover_kibana_url';
+import { KibanaClient } from './client';
+
+export async function createKibanaClient({
+  log,
+  signal,
+}: {
+  log: ToolingLog;
+  signal: AbortSignal;
+}) {
+  const baseUrl = await discoverKibanaUrl({
+    log,
+  });
+  return new KibanaClient({ baseUrl, signal });
+}
--- a/x-pack/platform/packages/shared/kbn-kibana-api-cli/src/discover_kibana_url.ts
+++ b/x-pack/platform/packages/shared/kbn-kibana-api-cli/src/discover_kibana_url.ts
@ -0,0 +1,143 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { ToolingLog } from '@kbn/tooling-log';
+import { omit } from 'lodash';
+import fetch from 'node-fetch';
+import { format, parse, Url } from 'url';
+import { getInternalKibanaHeaders } from './get_internal_kibana_headers';
+
+async function discoverAuth(parsedTarget: Url, log: ToolingLog) {
+  const possibleCredentials = [`elastic:changeme`, `admin:changeme`];
+  for (const auth of possibleCredentials) {
+    const url = format({
+      ...parsedTarget,
+      auth,
+    });
+    let status: number;
+    try {
+      log.debug(`Fetching ${url}`);
+      const response = await fetch(url, {
+        headers: getInternalKibanaHeaders(),
+      });
+      status = response.status;
+    } catch (err) {
+      log.debug(`${url} resulted in ${err.message}`);
+      status = 0;
+    }
+
+    if (status === 200) {
+      return auth;
+    }
+  }
+
+  throw new Error(`Failed to authenticate user for ${format(parsedTarget)}`);
+}
+
+async function getKibanaApiUrl({ baseUrl, log }: { baseUrl: string; log: ToolingLog }) {
+  try {
+    const isCI = process.env.CI?.toLowerCase() === 'true';
+
+    const parsedKibanaUrl = parse(baseUrl);
+
+    const kibanaUrlWithoutAuth = format(omit(parsedKibanaUrl, 'auth'));
+
+    log.debug(`Checking Kibana URL ${kibanaUrlWithoutAuth} for a redirect`);
+
+    const headers = {
+      ...getInternalKibanaHeaders(),
+      ...(parsedKibanaUrl.auth
+        ? { Authorization: `Basic ${Buffer.from(parsedKibanaUrl.auth).toString('base64')}` }
+        : {}),
+    };
+
+    const unredirectedResponse = await fetch(kibanaUrlWithoutAuth, {
+      headers,
+      method: 'HEAD',
+      follow: 1,
+      redirect: 'manual',
+    });
+
+    log.debug('Unredirected response', unredirectedResponse.headers.get('location'));
+
+    const discoveredKibanaUrl =
+      unredirectedResponse.headers
+        .get('location')
+        ?.replace('/spaces/enter', '')
+        ?.replace('spaces/space_selector', '') || kibanaUrlWithoutAuth;
+
+    log.debug(`Discovered Kibana URL at ${discoveredKibanaUrl}`);
+
+    const parsedTarget = parse(baseUrl);
+
+    const parsedDiscoveredUrl = parse(discoveredKibanaUrl);
+
+    const discoveredKibanaUrlWithAuth = format({
+      ...parsedDiscoveredUrl,
+      auth: parsedTarget.auth,
+    });
+
+    const redirectedResponse = await fetch(discoveredKibanaUrlWithAuth, {
+      method: 'HEAD',
+      headers,
+    });
+
+    if (redirectedResponse.status !== 200) {
+      throw new Error(
+        `Expected HTTP 200 from ${discoveredKibanaUrlWithAuth}, got ${redirectedResponse.status}`
+      );
+    }
+
+    const discoveredKibanaUrlWithoutAuth = format({
+      ...parsedDiscoveredUrl,
+      auth: undefined,
+    });
+
+    log.info(
+      `Discovered kibana running at: ${
+        isCI ? discoveredKibanaUrlWithoutAuth : discoveredKibanaUrlWithAuth
+      }`
+    );
+
+    return discoveredKibanaUrlWithAuth.replace(/\/$/, '');
+  } catch (error) {
+    throw new Error(`Could not connect to Kibana: ` + error.message);
+  }
+}
+
+export async function discoverKibanaUrl({
+  baseUrl,
+  log,
+  auth,
+}: {
+  baseUrl?: string;
+  log: ToolingLog;
+  auth?: { basic: { username: string; password: string } };
+}) {
+  baseUrl = baseUrl ?? 'http://127.0.0.1:5601';
+
+  const parsedTarget = parse(baseUrl);
+
+  let authToUse = auth?.basic ? `${auth.basic.username}:${auth.basic.password}` : parsedTarget.auth;
+
+  if (!authToUse) {
+    authToUse = await discoverAuth(parsedTarget, log);
+  }
+
+  const suspectedKibanaUrl = baseUrl;
+
+  const parsedKibanaUrl = parse(suspectedKibanaUrl);
+
+  const kibanaUrlWithAuth = format({
+    ...parsedKibanaUrl,
+    auth: authToUse,
+  });
+
+  const validatedKibanaUrl = await getKibanaApiUrl({ baseUrl: kibanaUrlWithAuth, log });
+
+  return validatedKibanaUrl;
+}
--- a/x-pack/platform/packages/shared/kbn-kibana-api-cli/src/get_internal_kibana_headers.ts
+++ b/x-pack/platform/packages/shared/kbn-kibana-api-cli/src/get_internal_kibana_headers.ts
@ -0,0 +1,13 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export function getInternalKibanaHeaders() {
+  return {
+    'kbn-xsrf': 'true',
+    'x-elastic-internal-origin': 'kibana',
+  };
+}
--- a/x-pack/platform/packages/shared/kbn-kibana-api-cli/src/kibana_fetch_response_error.ts
+++ b/x-pack/platform/packages/shared/kbn-kibana-api-cli/src/kibana_fetch_response_error.ts
@ -0,0 +1,13 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export class FetchResponseError extends Error {
+  constructor(public readonly response: globalThis.Response) {
+    super(response.statusText);
+    this.name = 'FetchResponseError';
+  }
+}
--- a/x-pack/platform/packages/shared/kbn-kibana-api-cli/src/proxy_transport.ts
+++ b/x-pack/platform/packages/shared/kbn-kibana-api-cli/src/proxy_transport.ts
@ -0,0 +1,93 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+import Path from 'path';
+import {
+  Transport,
+  TransportOptions,
+  TransportRequestOptions,
+  TransportRequestOptionsWithMeta,
+  TransportRequestOptionsWithOutMeta,
+  TransportRequestParams,
+  TransportResult,
+  errors,
+} from '@elastic/elasticsearch';
+
+export function createProxyTransport({
+  pathname,
+  headers,
+}: {
+  pathname: string;
+  headers?: Record<string, any>;
+}): typeof Transport {
+  return class ProxyTransport extends Transport {
+    constructor(options: TransportOptions) {
+      super(options);
+    }
+
+    request<TResponse = unknown>(
+      params: TransportRequestParams,
+      options?: TransportRequestOptionsWithOutMeta | TransportRequestOptions
+    ): Promise<TResponse>;
+    request<TResponse = unknown, TContext = any>(
+      params: TransportRequestParams,
+      options?: TransportRequestOptionsWithMeta
+    ): Promise<TransportResult<TResponse, TContext>>;
+    request(params: TransportRequestParams, options?: TransportRequestOptions) {
+      const queryParams = new URLSearchParams(params.querystring);
+
+      const next: TransportRequestParams = {
+        ...params,
+        method: 'POST',
+        path: Path.posix.join(pathname, '/api/console/proxy'),
+        querystring: {
+          path: `${params.path}?${queryParams.toString()}`,
+          method: params.method,
+        },
+      };
+
+      const esHeaders = {
+        ...options?.headers,
+        ...headers,
+      };
+
+      return super
+        .request(next, {
+          ...options,
+          headers: esHeaders,
+          meta: true,
+        })
+        .catch((error) => {
+          // the client will throw a ProductNotSupportedError when ES returns a 403
+          // and the proxy messes with the headers
+          if (error instanceof errors.ElasticsearchClientError) {
+            if ('meta' in error) {
+              const meta = error.meta as errors.ResponseError['meta'];
+              throw new errors.ResponseError({
+                ...meta,
+                statusCode: Number(
+                  meta.headers?.['x-console-proxy-status-code'] ?? meta.statusCode
+                ),
+              });
+            }
+          }
+          throw error;
+        })
+        .then((response) => {
+          if (response.statusCode >= 400) {
+            throw new errors.ResponseError({
+              statusCode: response.statusCode,
+              body: response.body,
+              meta: response.meta,
+              warnings: response.warnings,
+              headers: response.headers,
+            });
+          }
+          return options?.meta ? response : response.body;
+        });
+    }
+  };
+}
--- a/x-pack/platform/packages/shared/kbn-kibana-api-cli/tsconfig.json
+++ b/x-pack/platform/packages/shared/kbn-kibana-api-cli/tsconfig.json
@ -0,0 +1,19 @@
+{
+  "extends": "../../../../../tsconfig.base.json",
+  "compilerOptions": {
+    "outDir": "target/types",
+    "types": [
+      "jest",
+      "node"
+    ]
+  },
+  "include": [
+    "**/*.ts",
+  ],
+  "exclude": [
+    "target/**/*"
+  ],
+  "kbn_references": [
+    "@kbn/tooling-log",
+  ]
+}
--- a/x-pack/platform/plugins/shared/inference/common/http_apis.ts
+++ b/x-pack/platform/plugins/shared/inference/common/http_apis.ts
@ -14,12 +14,15 @@ import type {

 export type ChatCompleteRequestBody = {
  connectorId: string;
-  stream?: boolean;
  system?: string;
  temperature?: number;
  modelName?: string;
  messages: Message[];
  functionCalling?: FunctionCallingMode;
+  maxRetries?: number;
+  retryConfiguration?: {
+    retryOn?: 'all' | 'auto';
+  };
 } & ToolOptions;

 export interface GetConnectorsResponseBody {
--- a/x-pack/platform/plugins/shared/inference/server/routes/chat_complete.ts
+++ b/x-pack/platform/plugins/shared/inference/server/routes/chat_complete.ts
@ -20,10 +20,20 @@ import {
  InferenceTaskEventType,
  isInferenceError,
 } from '@kbn/inference-common';
+import { observableIntoEventSourceStream } from '@kbn/sse-utils-server';
 import type { ChatCompleteRequestBody } from '../../common/http_apis';
 import { createClient as createInferenceClient } from '../inference_client';
 import { InferenceServerStart, InferenceStartDependencies } from '../types';
-import { observableIntoEventSourceStream } from '../util/observable_into_event_source_stream';
+
+function getRequestAbortedSignal(request: KibanaRequest) {
+  const controller = new AbortController();
+  request.events.aborted$.subscribe({
+    complete: () => {
+      controller.abort();
+    },
+  });
+  return controller.signal;
+}

 const toolCallSchema: Type<ToolCall[]> = schema.arrayOf(
  schema.object({
@ -38,6 +48,12 @@ const toolCallSchema: Type<ToolCall[]> = schema.arrayOf(
 const chatCompleteBodySchema: Type<ChatCompleteRequestBody> = schema.object({
  connectorId: schema.string(),
  system: schema.maybe(schema.string()),
+  maxRetries: schema.maybe(schema.number()),
+  retryConfiguration: schema.maybe(
+    schema.object({
+      retryOn: schema.maybe(schema.oneOf([schema.literal('all'), schema.literal('auto')])),
+    })
+  ),
  tools: schema.maybe(
    schema.recordOf(
      schema.string(),
@ -116,7 +132,18 @@ export function registerChatCompleteRoute({

    const client = createInferenceClient({ request, actions, logger });

-    const { connectorId, messages, system, toolChoice, tools, functionCalling } = request.body;
+    const {
+      connectorId,
+      messages,
+      system,
+      toolChoice,
+      tools,
+      functionCalling,
+      maxRetries,
+      modelName,
+      retryConfiguration,
+      temperature,
+    } = request.body;

    return client.chatComplete({
      connectorId,
@ -127,6 +154,10 @@ export function registerChatCompleteRoute({
      functionCalling,
      stream,
      abortSignal: abortController.signal,
+      maxRetries,
+      modelName,
+      retryConfiguration,
+      temperature,
    });
  }

@ -179,7 +210,10 @@ export function registerChatCompleteRoute({
    async (context, request, response) => {
      const chatCompleteEvents$ = await callChatComplete({ request, stream: true });
      return response.ok({
-        body: observableIntoEventSourceStream(chatCompleteEvents$, logger),
+        body: observableIntoEventSourceStream(chatCompleteEvents$, {
+          logger,
+          signal: getRequestAbortedSignal(request),
+        }),
      });
    }
  );
--- a/x-pack/platform/plugins/shared/inference/tsconfig.json
+++ b/x-pack/platform/plugins/shared/inference/tsconfig.json
@ -36,6 +36,7 @@
    "@kbn/es-types",
    "@kbn/field-types",
    "@kbn/expressions-plugin",
-    "@kbn/inference-langchain"
+    "@kbn/inference-langchain",
+    "@kbn/sse-utils-server"
  ]
 }
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/common/conversation_complete.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/common/conversation_complete.ts
@ -6,6 +6,7 @@
 */

 import { i18n } from '@kbn/i18n';
+import { ServerSentEventBase } from '@kbn/sse-utils';
 import { type Message } from './types';

 export enum StreamingChatResponseEventType {
@ -18,27 +19,19 @@ export enum StreamingChatResponseEventType {
  BufferFlush = 'bufferFlush',
 }

-type StreamingChatResponseEventBase<
-  TEventType extends StreamingChatResponseEventType,
-  TData extends {}
-> = {
-  type: TEventType;
-} & TData;
-
-type BaseChatCompletionEvent<TType extends StreamingChatResponseEventType> =
-  StreamingChatResponseEventBase<
-    TType,
-    {
-      id: string;
-      message: {
-        content?: string;
-        function_call?: {
-          name?: string;
-          arguments?: string;
-        };
+type BaseChatCompletionEvent<TType extends StreamingChatResponseEventType> = ServerSentEventBase<
+  TType,
+  {
+    id: string;
+    message: {
+      content?: string;
+      function_call?: {
+        name?: string;
+        arguments?: string;
      };
-    }
-  >;
+    };
+  }
+>;

 export type ChatCompletionChunkEvent =
  BaseChatCompletionEvent<StreamingChatResponseEventType.ChatCompletionChunk>;
@ -46,7 +39,7 @@ export type ChatCompletionChunkEvent =
 export type ChatCompletionMessageEvent =
  BaseChatCompletionEvent<StreamingChatResponseEventType.ChatCompletionMessage>;

-export type ConversationCreateEvent = StreamingChatResponseEventBase<
+export type ConversationCreateEvent = ServerSentEventBase<
  StreamingChatResponseEventType.ConversationCreate,
  {
    conversation: {
@ -57,7 +50,7 @@ export type ConversationCreateEvent = StreamingChatResponseEventBase<
  }
 >;

-export type ConversationUpdateEvent = StreamingChatResponseEventBase<
+export type ConversationUpdateEvent = ServerSentEventBase<
  StreamingChatResponseEventType.ConversationUpdate,
  {
    conversation: {
@ -68,12 +61,12 @@ export type ConversationUpdateEvent = StreamingChatResponseEventBase<
  }
 >;

-export type MessageAddEvent = StreamingChatResponseEventBase<
+export type MessageAddEvent = ServerSentEventBase<
  StreamingChatResponseEventType.MessageAdd,
  { message: Message; id: string }
 >;

-export type ChatCompletionErrorEvent = StreamingChatResponseEventBase<
+export type ChatCompletionErrorEvent = ServerSentEventBase<
  StreamingChatResponseEventType.ChatCompletionError,
  {
    error: {
@ -85,7 +78,7 @@ export type ChatCompletionErrorEvent = StreamingChatResponseEventBase<
  }
 >;

-export type BufferFlushEvent = StreamingChatResponseEventBase<
+export type BufferFlushEvent = ServerSentEventBase<
  StreamingChatResponseEventType.BufferFlush,
  {
    data?: string;
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/tsconfig.json
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/tsconfig.json
@ -52,7 +52,8 @@
    "@kbn/inference-plugin",
    "@kbn/ai-assistant-icon",
    "@kbn/security-authorization-core-common",
-    "@kbn/core-http-browser"
+    "@kbn/core-http-browser",
+    "@kbn/sse-utils"
  ],
  "exclude": ["target/**/*"]
 }
--- a/x-pack/solutions/observability/packages/kbn-genai-cli/README.md
+++ b/x-pack/solutions/observability/packages/kbn-genai-cli/README.md
@ -0,0 +1,15 @@
+# @kbn/genai-cli
+
+Repository of GenAI scripts, both for practical and educational purposes.
+
+Usage:
+
+```ts
+import { runRecipe } from '../utils/run_recipe';
+
+runRecipe(async ({ inferenceClient, kibanaClient, log, signal }) => {
+  const output = await inferenceClient.output({});
+
+  log.info(output);
+});
+```
--- a/x-pack/solutions/observability/packages/kbn-genai-cli/jest.config.js
+++ b/x-pack/solutions/observability/packages/kbn-genai-cli/jest.config.js
@ -0,0 +1,12 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+module.exports = {
+  preset: '@kbn/test/jest_node',
+  rootDir: '../../../../..',
+  roots: ['<rootDir>/x-pack/solutions/observability/packages/kbn-genai-cli'],
+};
--- a/x-pack/solutions/observability/packages/kbn-genai-cli/kibana.jsonc
+++ b/x-pack/solutions/observability/packages/kbn-genai-cli/kibana.jsonc
@ -0,0 +1,8 @@
+{
+  "type": "shared-common",
+  "id": "@kbn/kbn-genai-cli",
+  "owner": "@elastic/obs-knowledge-team",
+  "group": "observability",
+  "visibility": "private",
+  "devOnly": true
+}
--- a/x-pack/solutions/observability/packages/kbn-genai-cli/package.json
+++ b/x-pack/solutions/observability/packages/kbn-genai-cli/package.json
@ -0,0 +1,6 @@
+{
+  "name": "@kbn/kbn-genai-cli",
+  "private": true,
+  "version": "1.0.0",
+  "license": "Elastic License 2.0"
+}
--- a/x-pack/solutions/observability/packages/kbn-genai-cli/recipes/group_prompts.ts
+++ b/x-pack/solutions/observability/packages/kbn-genai-cli/recipes/group_prompts.ts
@ -0,0 +1,315 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { Tokenizer } from '@kbn/inference-common';
+import { ShortIdTable } from '@kbn/observability-utils-common/llm/short_id_table';
+import { chunk, partition } from 'lodash';
+import pLimit from 'p-limit';
+import { FetchResponseError } from '@kbn/kibana-api-cli';
+import { runRecipe } from '../utils/run_recipe';
+
+/**
+ * This recipe classifies prompts from user telemetry for the
+ * Observability AI Assistant. This telemetry is no longer
+ * recorded, so it's here for educational purposes.
+ */
+
+runRecipe(async ({ inferenceClient, kibanaClient, log, signal }) => {
+  // this assumes the schema of `ebt-kibana-server` in the telemetry cluster.
+  // two fields used are `timestamp` and `properties.prompt`.
+
+  const SOURCE_INDEX = ':INVALID_INDEX_NAME';
+  const TARGET_INDEX = SOURCE_INDEX;
+
+  const response = await kibanaClient.es.search<{ properties: { prompt: string } }>({
+    index: SOURCE_INDEX,
+    track_total_hits: true,
+    size: 10000,
+    sort: {
+      timestamp: {
+        order: 'desc',
+      },
+    },
+  });
+
+  log.info(`Downloaded ${response.hits.hits.length} documents`);
+
+  const table = new ShortIdTable();
+
+  const itemsToProcess = response.hits.hits.map((hit) => {
+    let text = hit._source!.properties.prompt.trim();
+    const id = hit._id!;
+
+    let data: undefined | any[] | Record<string, unknown>;
+
+    // screen descriptions are added to the prompt. this
+    // regex accounts for changes in different versions
+    // in some cases `prompt` is structured data.
+    const [prompt, ...screenDescriptions] = text.split(
+      /((\|)|(\\n\\n)|(\s*))(The user is looking at|The user has APM data)/
+    );
+
+    const screenDescription = screenDescriptions.join('');
+
+    if (screenDescription) {
+      text = prompt.trim();
+    }
+
+    // if it's structured data, parse it.
+    try {
+      if (text.startsWith('[') && text.endsWith(']')) {
+        data = JSON.parse(text);
+      } else if (text.startsWith('{') && text.endsWith('}')) {
+        data = JSON.parse(text);
+      } else if (text.startsWith('{')) {
+        const dataText = text.split('}|')[0] + '}';
+        data = JSON.parse(dataText);
+      }
+    } catch (err) {
+      // sometimes folks parse in JSON responses from Dev Tools w/ multiline etc which fails to parse
+    }
+
+    // everything after /app/ are the relevant parts of the URL
+    const [, ...urlParts] = screenDescription.split('/app/');
+    let url = urlParts.join('/app/');
+    // take until the dot
+    url = url.split(/(\.|\s|$|\n)/)[0];
+
+    const app = url ? url.split('/')[0] : undefined;
+
+    let type = 'prompt';
+
+    if (data && 'instructions' in data) {
+      type = 'contextual_insight';
+    } else if (data && 'connectors' in data) {
+      type = 'connector';
+    }
+
+    return {
+      _id: id,
+      shortId: table.take(id),
+      screen: screenDescription
+        ? {
+            description: screenDescription ? `The user is looking at ${screenDescription}` : '',
+            url,
+            app,
+          }
+        : null,
+      type,
+      original: text,
+      prompt: data ? null : prompt.trim(),
+      truncated: Tokenizer.truncate(prompt, 500),
+      _index: hit._index!,
+      _source: hit._source,
+    };
+  });
+
+  const itemsWithPrompts = itemsToProcess.filter((item) => !!item.prompt);
+
+  // limit to 25 prompts per call, to prevent output tokens from exceeding the limit
+  const batches = chunk(itemsWithPrompts, 25);
+
+  log.info(`Processing ${itemsWithPrompts.length} prompts in ${batches.length} batches`);
+
+  const limiter = pLimit(20);
+
+  let finished = 0;
+  let failures = 0;
+
+  const logProgressIntervalId = setInterval(() => {
+    log.info(`Progress: completed ${finished} out of ${batches.length} total, ${failures} failed`);
+  }, 5000);
+
+  signal.addEventListener('abort', () => {
+    clearInterval(logProgressIntervalId);
+  });
+
+  const results = await Promise.allSettled(
+    batches.map((batch, index) => {
+      const input = `Classify the given prompt. It's from a user of the
+      Observability AI Assistant. Tag each prompt with one or more
+      topics. Use the following tags if applicable:
+
+      - \`query\`: the user asks questions about their data, such as:
+        - "Show me my zookeeper logs over the last 15 minutes"
+        - "Give me the slowest transactions"
+        - "Generate an ES|QL query that lists all the services"
+      - \`es\`: the user asks questions about their Elasticsearch
+      instance, such as:
+        - "What is the status of my cluster"
+      - \`product-help\`: the user asks for help with the Elastic
+      products, such as:
+        - "How do I install an ingest pipeline"
+      - \`screen\`: the user asks questions about the things
+      that are on their screen (they are looking at Kibana), such as:
+        - "Explain this dashboard" when they are on /app/dashboards
+        - "What does this error mean" when they are on /app/apm/{serviceName}/errors
+      - \`rca\`: the user wants the assistant to explain why
+      something is happening, such as:
+        - "What causes this alert?"
+        - "What caused this error?"
+      - \`signals\`: the user wants to retrieve signals, such as alerts,
+      anomalies or SLO/SLIs
+      
+      If you see other topics, generate a new tag, using lowercase
+      and alphanumerical characters.
+
+      Additionally, tag the language using ISO 639‑1, prefixed by \`lang\`,
+      such as: \`lang-en\`
+
+      ## Prompts
+
+      ${JSON.stringify(
+        batch.map((prompt) => ({
+          id: prompt.shortId,
+          text: prompt.truncated,
+          url: prompt.screen?.url ?? 'unknown',
+        }))
+      )}
+      `;
+
+      return limiter(() => {
+        log.debug(`inference: started batch ${index} out of ${batches.length}`);
+        return inferenceClient.output({
+          id: 'classify_prompts',
+          input,
+          schema: {
+            type: 'object',
+            properties: {
+              prompts: {
+                type: 'array',
+                items: {
+                  type: 'object',
+                  properties: {
+                    id: {
+                      type: 'string',
+                    },
+                    tags: {
+                      type: 'array',
+                      items: {
+                        type: 'string',
+                      },
+                    },
+                  },
+                  required: ['id', 'tags'],
+                },
+              },
+            },
+            required: ['prompts'],
+          } as const,
+        });
+      })
+        .catch((error) => {
+          failures++;
+          if (error instanceof FetchResponseError) {
+            return error.response.json().then((res) => {
+              log.warning(
+                `Failed to get results: "${error.message}" with ${JSON.stringify(
+                  res
+                )}, input token count was ${Tokenizer.count(input)}`
+              );
+              throw error;
+            });
+          }
+          log.warning(`Failed to get results: ${error.message}`);
+          throw error;
+        })
+        .finally(() => {
+          finished++;
+          log.debug(`inference: settled batch ${index} out of ${batches.length}`);
+        });
+    })
+  );
+
+  clearInterval(logProgressIntervalId);
+
+  const tagsByShortId = new Map<string, string[]>();
+
+  results.forEach((result) => {
+    if (result.status === 'rejected') {
+      return;
+    }
+    result.value.output.prompts?.forEach((prompt) => {
+      tagsByShortId.set(prompt.id, prompt.tags);
+    });
+  });
+
+  log.info(`Completed: processed ${itemsToProcess.length} prompts, ${failures} batches failed`);
+
+  const docs = itemsToProcess.map((item) => {
+    const itemTags = tagsByShortId.get(item.shortId);
+
+    const [lang, otherTags] = partition(itemTags, (tag) => tag.startsWith('lang-'));
+
+    return {
+      prompt: item.prompt,
+      tags: otherTags,
+      type: item.type,
+      language: lang.map((tag) => tag.split('lang-')[1]),
+      screen: item.screen,
+      _id: item._id,
+      _source: item._source,
+      _index: item._index,
+    };
+  });
+
+  const bulkBatches = chunk(docs, 100);
+
+  log.info(`Bulk indexing ${docs.length} docs in ${bulkBatches.length} batches`);
+
+  const batchLimiter = pLimit(5);
+  await Promise.allSettled(
+    bulkBatches.map((batch, index) => {
+      const operations = batch.flatMap((doc) => {
+        return [
+          {
+            index: {
+              _index: TARGET_INDEX,
+              _id: doc._id,
+            },
+          },
+          {
+            ...doc._source,
+            prompt: doc.prompt,
+            screen: doc.screen,
+            tags: doc.tags,
+            type: doc.type,
+            language: doc.language,
+          },
+        ];
+      });
+
+      return batchLimiter(() => {
+        log.debug(`bulk: starting batch ${index} out of ${batches.length}`);
+        return kibanaClient.es
+          .bulk({
+            operations,
+            refresh: true,
+          })
+          .then((result) => {
+            if (result.errors) {
+              const failed = result.items.filter((item) => Object.values(item)[0].error);
+              log.warning(
+                `Some documents failed to index: ${failed.length}, example: ${JSON.stringify(
+                  failed[0]
+                )}`
+              );
+              return;
+            }
+          })
+          .catch((error) => {
+            log.warning(`Failed indexing bulk request: ${error.message}`);
+          })
+          .finally(() => {
+            log.debug(`bulk: settled batch ${index} out of ${batches.length}`);
+          });
+      });
+    })
+  );
+
+  log.info(`Completed bulk indexing`);
+});
--- a/x-pack/solutions/observability/packages/kbn-genai-cli/recipes/hello_world.ts
+++ b/x-pack/solutions/observability/packages/kbn-genai-cli/recipes/hello_world.ts
@ -0,0 +1,30 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { runRecipe } from '../utils/run_recipe';
+
+/**
+ * Simple example of how to use these recipes
+ */
+
+runRecipe(async ({ inferenceClient, kibanaClient, log, signal }) => {
+  const response = await inferenceClient.output({
+    id: 'extract_personal_details',
+    input: `Sarah is a 29-year-old software developer living in San Francisco.`,
+    schema: {
+      type: 'object',
+      properties: {
+        name: { type: 'string' },
+        age: { type: 'number' },
+        city: { type: 'string' },
+      },
+      required: ['name'],
+    } as const,
+  });
+
+  log.info(response);
+});
--- a/x-pack/solutions/observability/packages/kbn-genai-cli/tsconfig.json
+++ b/x-pack/solutions/observability/packages/kbn-genai-cli/tsconfig.json
@ -0,0 +1,24 @@
+{
+  "extends": "../../../../../tsconfig.base.json",
+  "compilerOptions": {
+    "outDir": "target/types",
+    "types": [
+      "jest",
+      "node"
+    ]
+  },
+  "include": [
+    "**/*.ts",
+  ],
+  "exclude": [
+    "target/**/*"
+  ],
+  "kbn_references": [
+    "@kbn/inference-common",
+    "@kbn/observability-utils-common",
+    "@kbn/kibana-api-cli",
+    "@kbn/dev-cli-runner",
+    "@kbn/inference-cli",
+    "@kbn/tooling-log",
+  ],
+}
--- a/x-pack/solutions/observability/packages/kbn-genai-cli/utils/run_recipe.ts
+++ b/x-pack/solutions/observability/packages/kbn-genai-cli/utils/run_recipe.ts
@ -0,0 +1,47 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+import { run } from '@kbn/dev-cli-runner';
+import { createInferenceClient, InferenceCliClient } from '@kbn/inference-cli';
+import { ToolingLog } from '@kbn/tooling-log';
+import { createKibanaClient, KibanaClient } from '@kbn/kibana-api-cli';
+
+type RunRecipeCallback = (options: {
+  inferenceClient: InferenceCliClient;
+  kibanaClient: KibanaClient;
+  log: ToolingLog;
+  signal: AbortSignal;
+}) => Promise<void>;
+
+export function runRecipe(callback: RunRecipeCallback) {
+  run(
+    async ({ log, addCleanupTask }) => {
+      const controller = new AbortController();
+      const signal = controller.signal;
+
+      addCleanupTask(() => {
+        controller.abort();
+      });
+
+      const kibanaClient = await createKibanaClient({ log, signal });
+
+      const inferenceClient = await createInferenceClient({
+        log,
+        signal,
+        kibanaClient,
+      });
+      return await callback({
+        inferenceClient,
+        kibanaClient,
+        log,
+        signal,
+      });
+    },
+    {
+      flags: {},
+    }
+  );
+}
--- a/yarn.lock
+++ b/yarn.lock
@ -5789,6 +5789,10 @@
  version "0.0.0"
  uid ""

+"@kbn/inference-cli@link:x-pack/platform/packages/shared/kbn-inference-cli":
+  version "0.0.0"
+  uid ""
+
 "@kbn/inference-common@link:x-pack/platform/packages/shared/ai-infra/inference-common":
  version "0.0.0"
  uid ""
@ -5889,6 +5893,10 @@
  version "0.0.0"
  uid ""

+"@kbn/kbn-genai-cli@link:x-pack/solutions/observability/packages/kbn-genai-cli":
+  version "0.0.0"
+  uid ""
+
 "@kbn/kbn-health-gateway-status-plugin@link:src/platform/test/health_gateway/plugins/status":
  version "0.0.0"
  uid ""
@ -5913,6 +5921,10 @@
  version "0.0.0"
  uid ""

+"@kbn/kibana-api-cli@link:x-pack/platform/packages/shared/kbn-kibana-api-cli":
+  version "0.0.0"
+  uid ""
+
 "@kbn/kibana-cors-test-plugin@link:x-pack/test/functional_cors/plugins/kibana_cors_test":
  version "0.0.0"
  uid ""