[8.x] [Inference] create the @kbn/inference-common package (#193464) (#199002)

# Backport This will backport the following commits from `main` to `8.x`: - [Inference] create the `@kbn/inference-common` package (#193464) (631ccb03)  ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport)
2025-04-24 17:59:23 -04:00 · 2024-11-06 00:01:38 +01:00 · 2024-11-06 00:01:38 +01:00 · 0c5f916c34
commit 0c5f916c34
parent 2120e2a748
93 changed files with 1040 additions and 519 deletions
--- a/package.json
+++ b/package.json
@ -571,6 +571,7 @@
    "@kbn/index-management-plugin": "link:x-pack/plugins/index_management",
    "@kbn/index-management-shared-types": "link:x-pack/packages/index-management/index_management_shared_types",
    "@kbn/index-patterns-test-plugin": "link:test/plugin_functional/plugins/index_patterns",
+    "@kbn/inference-common": "link:x-pack/packages/ai-infra/inference-common",
    "@kbn/inference-plugin": "link:x-pack/plugins/inference",
    "@kbn/inference_integration_flyout": "link:x-pack/packages/ml/inference_integration_flyout",
    "@kbn/infra-forge": "link:x-pack/packages/kbn-infra-forge",
--- a/tsconfig.base.json
+++ b/tsconfig.base.json
@ -1044,6 +1044,8 @@
      "@kbn/index-patterns-test-plugin/*": ["test/plugin_functional/plugins/index_patterns/*"],
      "@kbn/inference_integration_flyout": ["x-pack/packages/ml/inference_integration_flyout"],
      "@kbn/inference_integration_flyout/*": ["x-pack/packages/ml/inference_integration_flyout/*"],
+      "@kbn/inference-common": ["x-pack/packages/ai-infra/inference-common"],
+      "@kbn/inference-common/*": ["x-pack/packages/ai-infra/inference-common/*"],
      "@kbn/inference-plugin": ["x-pack/plugins/inference"],
      "@kbn/inference-plugin/*": ["x-pack/plugins/inference/*"],
      "@kbn/infra-forge": ["x-pack/packages/kbn-infra-forge"],
--- a/x-pack/packages/ai-infra/inference-common/README.md
+++ b/x-pack/packages/ai-infra/inference-common/README.md
@ -0,0 +1,7 @@
+# @kbn/inference-common
+
+Common types and utilities for the inference APIs and features.
+
+The main purpose of the package is to have a clean line between the inference plugin's
+implementation and the underlying types, so that other packages or plugins can leverage the
+types without directly depending on the plugin.
--- a/x-pack/packages/ai-infra/inference-common/index.ts
+++ b/x-pack/packages/ai-infra/inference-common/index.ts
@ -0,0 +1,77 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export {
+  MessageRole,
+  ChatCompletionEventType,
+  ToolChoiceType,
+  type Message,
+  type AssistantMessage,
+  type ToolMessage,
+  type UserMessage,
+  type ToolSchemaType,
+  type FromToolSchema,
+  type ToolSchema,
+  type UnvalidatedToolCall,
+  type ToolCallsOf,
+  type ToolCall,
+  type ToolDefinition,
+  type ToolOptions,
+  type FunctionCallingMode,
+  type ToolChoice,
+  type ChatCompleteAPI,
+  type ChatCompleteOptions,
+  type ChatCompletionResponse,
+  type ChatCompletionTokenCountEvent,
+  type ChatCompletionEvent,
+  type ChatCompletionChunkEvent,
+  type ChatCompletionChunkToolCall,
+  type ChatCompletionMessageEvent,
+  withoutTokenCountEvents,
+  withoutChunkEvents,
+  isChatCompletionMessageEvent,
+  isChatCompletionEvent,
+  isChatCompletionChunkEvent,
+  isChatCompletionTokenCountEvent,
+  ChatCompletionErrorCode,
+  type ChatCompletionToolNotFoundError,
+  type ChatCompletionToolValidationError,
+  type ChatCompletionTokenLimitReachedError,
+  isToolValidationError,
+  isTokenLimitReachedError,
+  isToolNotFoundError,
+} from './src/chat_complete';
+export {
+  OutputEventType,
+  type OutputAPI,
+  type OutputResponse,
+  type OutputCompleteEvent,
+  type OutputUpdateEvent,
+  type Output,
+  type OutputEvent,
+  isOutputCompleteEvent,
+  isOutputUpdateEvent,
+  isOutputEvent,
+  withoutOutputUpdateEvents,
+} from './src/output';
+export {
+  InferenceTaskEventType,
+  type InferenceTaskEvent,
+  type InferenceTaskEventBase,
+} from './src/inference_task';
+export {
+  InferenceTaskError,
+  InferenceTaskErrorCode,
+  type InferenceTaskErrorEvent,
+  type InferenceTaskInternalError,
+  type InferenceTaskRequestError,
+  createInferenceInternalError,
+  createInferenceRequestError,
+  isInferenceError,
+  isInferenceInternalError,
+  isInferenceRequestError,
+} from './src/errors';
--- a/x-pack/plugins/inference/common/output/is_output_update_event.ts
+++ b/x-pack/plugins/inference/common/output/is_output_update_event.ts
@ -5,10 +5,8 @@
 * 2.0.
 */

-import { OutputEvent, OutputEventType, OutputUpdateEvent } from '.';
-
-export function isOutputUpdateEvent<TId extends string>(
-  event: OutputEvent
-): event is OutputUpdateEvent<TId> {
-  return event.type === OutputEventType.OutputComplete;
-}
+module.exports = {
+  preset: '@kbn/test',
+  rootDir: '../../../..',
+  roots: ['<rootDir>/x-pack/packages/ai-infra/inference-common'],
+};
--- a/x-pack/packages/ai-infra/inference-common/kibana.jsonc
+++ b/x-pack/packages/ai-infra/inference-common/kibana.jsonc
@ -0,0 +1,5 @@
+{
+  "type": "shared-common",
+  "id": "@kbn/inference-common",
+  "owner": "@elastic/appex-ai-infra"
+}
--- a/x-pack/packages/ai-infra/inference-common/package.json
+++ b/x-pack/packages/ai-infra/inference-common/package.json
@ -0,0 +1,7 @@
+{
+  "name": "@kbn/inference-common",
+  "private": true,
+  "version": "1.0.0",
+  "license": "Elastic License 2.0",
+  "sideEffects": false
+}
--- a/x-pack/packages/ai-infra/inference-common/src/chat_complete/api.ts
+++ b/x-pack/packages/ai-infra/inference-common/src/chat_complete/api.ts
@ -0,0 +1,69 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { Observable } from 'rxjs';
+import type { ToolOptions } from './tools';
+import type { Message } from './messages';
+import type { ChatCompletionEvent } from './events';
+
+/**
+ * Request a completion from the LLM based on a prompt or conversation.
+ *
+ * @example using the API to get an event observable.
+ * ```ts
+ * const events$ = chatComplete({
+ *   connectorId: 'my-connector',
+ *   system: "You are a helpful assistant",
+ *   messages: [
+ *      { role: MessageRole.User, content: "First question?"},
+ *      { role: MessageRole.Assistant, content: "Some answer"},
+ *      { role: MessageRole.User, content: "Another question?"},
+ *   ]
+ * });
+ */
+export type ChatCompleteAPI = <TToolOptions extends ToolOptions = ToolOptions>(
+  options: ChatCompleteOptions<TToolOptions>
+) => ChatCompletionResponse<TToolOptions>;
+
+/**
+ * Options used to call the {@link ChatCompleteAPI}
+ */
+export type ChatCompleteOptions<TToolOptions extends ToolOptions = ToolOptions> = {
+  /**
+   * The ID of the connector to use.
+   * Must be a genAI compatible connector, or an error will be thrown.
+   */
+  connectorId: string;
+  /**
+   * Optional system message for the LLM.
+   */
+  system?: string;
+  /**
+   * The list of messages for the current conversation
+   */
+  messages: Message[];
+  /**
+   * Function calling mode, defaults to "native".
+   */
+  functionCalling?: FunctionCallingMode;
+} & TToolOptions;
+
+/**
+ * Response from the {@link ChatCompleteAPI}.
+ *
+ * Observable of {@link ChatCompletionEvent}
+ */
+export type ChatCompletionResponse<TToolOptions extends ToolOptions = ToolOptions> = Observable<
+  ChatCompletionEvent<TToolOptions>
+>;
+
+/**
+ * Define the function calling mode when using inference APIs.
+ * - native will use the LLM's native function calling (requires the LLM to have native support)
+ * - simulated: will emulate function calling with function calling instructions
+ */
+export type FunctionCallingMode = 'native' | 'simulated';
--- a/x-pack/packages/ai-infra/inference-common/src/chat_complete/errors.ts
+++ b/x-pack/packages/ai-infra/inference-common/src/chat_complete/errors.ts
@ -5,16 +5,22 @@
 * 2.0.
 */

-import { i18n } from '@kbn/i18n';
 import { InferenceTaskError } from '../errors';
 import type { UnvalidatedToolCall } from './tools';

+/**
+ * List of code of error that are specific to the {@link ChatCompleteAPI}
+ */
 export enum ChatCompletionErrorCode {
  TokenLimitReachedError = 'tokenLimitReachedError',
  ToolNotFoundError = 'toolNotFoundError',
  ToolValidationError = 'toolValidationError',
 }

+/**
+ * Error thrown if the completion call fails because of a token limit
+ * error, e.g. when the context window is higher than the limit
+ */
 export type ChatCompletionTokenLimitReachedError = InferenceTaskError<
  ChatCompletionErrorCode.TokenLimitReachedError,
  {
@ -23,13 +29,24 @@ export type ChatCompletionTokenLimitReachedError = InferenceTaskError<
  }
 >;

+/**
+ * Error thrown if the LLM called a tool that was not provided
+ * in the list of available tools.
+ */
 export type ChatCompletionToolNotFoundError = InferenceTaskError<
  ChatCompletionErrorCode.ToolNotFoundError,
  {
+    /** The name of the tool that got called */
    name: string;
  }
 >;

+/**
+ * Error thrown when the LLM called a tool with parameters that
+ * don't match the tool's schema.
+ *
+ * The level of details on the error vary depending on the underlying LLM.
+ */
 export type ChatCompletionToolValidationError = InferenceTaskError<
  ChatCompletionErrorCode.ToolValidationError,
  {
@ -40,42 +57,9 @@ export type ChatCompletionToolValidationError = InferenceTaskError<
  }
 >;

-export function createTokenLimitReachedError(
-  tokenLimit?: number,
-  tokenCount?: number
-): ChatCompletionTokenLimitReachedError {
-  return new InferenceTaskError(
-    ChatCompletionErrorCode.TokenLimitReachedError,
-    i18n.translate('xpack.inference.chatCompletionError.tokenLimitReachedError', {
-      defaultMessage: `Token limit reached. Token limit is {tokenLimit}, but the current conversation has {tokenCount} tokens.`,
-      values: { tokenLimit, tokenCount },
-    }),
-    { tokenLimit, tokenCount }
-  );
-}
-
-export function createToolNotFoundError(name: string): ChatCompletionToolNotFoundError {
-  return new InferenceTaskError(
-    ChatCompletionErrorCode.ToolNotFoundError,
-    `Tool ${name} called but was not available`,
-    {
-      name,
-    }
-  );
-}
-
-export function createToolValidationError(
-  message: string,
-  meta: {
-    name?: string;
-    arguments?: string;
-    errorsText?: string;
-    toolCalls?: UnvalidatedToolCall[];
-  }
-): ChatCompletionToolValidationError {
-  return new InferenceTaskError(ChatCompletionErrorCode.ToolValidationError, message, meta);
-}
-
+/**
+ * Check if an error is a {@link ChatCompletionToolValidationError}
+ */
 export function isToolValidationError(error?: Error): error is ChatCompletionToolValidationError {
  return (
    error instanceof InferenceTaskError &&
@ -83,6 +67,9 @@ export function isToolValidationError(error?: Error): error is ChatCompletionToo
  );
 }

+/**
+ * Check if an error is a {@link ChatCompletionTokenLimitReachedError}
+ */
 export function isTokenLimitReachedError(
  error: Error
 ): error is ChatCompletionTokenLimitReachedError {
@ -92,6 +79,9 @@ export function isTokenLimitReachedError(
  );
 }

+/**
+ * Check if an error is a {@link ChatCompletionToolNotFoundError}
+ */
 export function isToolNotFoundError(error: Error): error is ChatCompletionToolNotFoundError {
  return (
    error instanceof InferenceTaskError && error.code === ChatCompletionErrorCode.ToolNotFoundError
--- a/x-pack/packages/ai-infra/inference-common/src/chat_complete/event_utils.ts
+++ b/x-pack/packages/ai-infra/inference-common/src/chat_complete/event_utils.ts
@ -0,0 +1,81 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { filter, OperatorFunction } from 'rxjs';
+import { InferenceTaskEvent } from '../inference_task';
+import {
+  ChatCompletionEventType,
+  ChatCompletionEvent,
+  ChatCompletionChunkEvent,
+  ChatCompletionMessageEvent,
+  ChatCompletionTokenCountEvent,
+} from './events';
+import type { ToolOptions } from './tools';
+
+/**
+ * Check if the provided {@link ChatCompletionEvent} is a {@link ChatCompletionChunkEvent}
+ */
+export function isChatCompletionChunkEvent(
+  event: ChatCompletionEvent
+): event is ChatCompletionChunkEvent {
+  return event.type === ChatCompletionEventType.ChatCompletionChunk;
+}
+
+/**
+ * Check if the provided {@link ChatCompletionEvent} is a {@link ChatCompletionMessageEvent}
+ */
+export function isChatCompletionMessageEvent<T extends ToolOptions>(
+  event: ChatCompletionEvent<T>
+): event is ChatCompletionMessageEvent<T> {
+  return event.type === ChatCompletionEventType.ChatCompletionMessage;
+}
+
+/**
+ * Check if the provided {@link ChatCompletionEvent} is a {@link ChatCompletionMessageEvent}
+ */
+export function isChatCompletionTokenCountEvent(
+  event: ChatCompletionEvent
+): event is ChatCompletionTokenCountEvent {
+  return event.type === ChatCompletionEventType.ChatCompletionTokenCount;
+}
+
+/**
+ * Check if the provided {@link InferenceTaskEvent} is a {@link ChatCompletionEvent}
+ */
+export function isChatCompletionEvent(event: InferenceTaskEvent): event is ChatCompletionEvent {
+  return (
+    event.type === ChatCompletionEventType.ChatCompletionChunk ||
+    event.type === ChatCompletionEventType.ChatCompletionMessage ||
+    event.type === ChatCompletionEventType.ChatCompletionTokenCount
+  );
+}
+
+/**
+ * Operator filtering out the chunk events from the provided observable.
+ */
+export function withoutChunkEvents<T extends ChatCompletionEvent>(): OperatorFunction<
+  T,
+  Exclude<T, ChatCompletionChunkEvent>
+> {
+  return filter(
+    (event): event is Exclude<T, ChatCompletionChunkEvent> =>
+      event.type !== ChatCompletionEventType.ChatCompletionChunk
+  );
+}
+
+/**
+ * Operator filtering out the token count events from the provided observable.
+ */
+export function withoutTokenCountEvents<T extends ChatCompletionEvent>(): OperatorFunction<
+  T,
+  Exclude<T, ChatCompletionTokenCountEvent>
+> {
+  return filter(
+    (event): event is Exclude<T, ChatCompletionTokenCountEvent> =>
+      event.type !== ChatCompletionEventType.ChatCompletionTokenCount
+  );
+}
--- a/x-pack/packages/ai-infra/inference-common/src/chat_complete/events.ts
+++ b/x-pack/packages/ai-infra/inference-common/src/chat_complete/events.ts
@ -0,0 +1,118 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { InferenceTaskEventBase } from '../inference_task';
+import type { ToolCallsOf, ToolOptions } from './tools';
+
+/**
+ * List possible values of {@link ChatCompletionEvent} types.
+ */
+export enum ChatCompletionEventType {
+  ChatCompletionChunk = 'chatCompletionChunk',
+  ChatCompletionTokenCount = 'chatCompletionTokenCount',
+  ChatCompletionMessage = 'chatCompletionMessage',
+}
+
+/**
+ * Message event, sent only once, after all the chunks were emitted, and containing
+ * the whole text content and potential tool calls of the response.
+ */
+export type ChatCompletionMessageEvent<TToolOptions extends ToolOptions = ToolOptions> =
+  InferenceTaskEventBase<ChatCompletionEventType.ChatCompletionMessage> & {
+    /**
+     * The text content of the LLM response.
+     */
+    content: string;
+    /**
+     * The eventual tool calls performed by the LLM.
+     */
+    toolCalls: ToolCallsOf<TToolOptions>['toolCalls'];
+  };
+
+/**
+ * Represent a partial tool call present in a chunk event.
+ *
+ * Note that all properties of the structure, except from the index,
+ * are partial and must be aggregated.
+ */
+export interface ChatCompletionChunkToolCall {
+  /**
+   * The tool call index (position in the tool call array).
+   */
+  index: number;
+  /**
+   * chunk of tool call id.
+   */
+  toolCallId: string;
+  function: {
+    /**
+     * chunk of tool name.
+     */
+    name: string;
+    /**
+     * chunk of tool call arguments.
+     */
+    arguments: string;
+  };
+}
+
+/**
+ * Chunk event, containing a fragment of the total content,
+ * and potentially chunks of tool calls.
+ */
+export type ChatCompletionChunkEvent =
+  InferenceTaskEventBase<ChatCompletionEventType.ChatCompletionChunk> & {
+    /**
+     * The content chunk
+     */
+    content: string;
+    /**
+     * The tool call chunks
+     */
+    tool_calls: ChatCompletionChunkToolCall[];
+  };
+
+/**
+ * Token count event, send only once, usually (but not necessarily)
+ * before the message event
+ */
+export type ChatCompletionTokenCountEvent =
+  InferenceTaskEventBase<ChatCompletionEventType.ChatCompletionTokenCount> & {
+    tokens: {
+      /**
+       * Input token count
+       */
+      prompt: number;
+      /**
+       * Output token count
+       */
+      completion: number;
+      /**
+       * Total token count
+       */
+      total: number;
+    };
+  };
+
+/**
+ * Events emitted from the {@link ChatCompletionResponse} observable
+ * returned from the {@link ChatCompleteAPI}.
+ *
+ * The chatComplete API returns 3 type of events:
+ * - {@link ChatCompletionChunkEvent}: message chunk events
+ * - {@link ChatCompletionTokenCountEvent}: token count event
+ * - {@link ChatCompletionMessageEvent}: message event
+ *
+ * Note that chunk events can be emitted any amount of times, but token count will be emitted
+ * at most once (could not be emitted depending on the underlying connector), and message
+ * event will be emitted ex
+ *
+ */
+export type ChatCompletionEvent<TToolOptions extends ToolOptions = ToolOptions> =
+  | ChatCompletionChunkEvent
+  | ChatCompletionTokenCountEvent
+  | ChatCompletionMessageEvent<TToolOptions>;
--- a/x-pack/packages/ai-infra/inference-common/src/chat_complete/index.ts
+++ b/x-pack/packages/ai-infra/inference-common/src/chat_complete/index.ts
@ -0,0 +1,55 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export type {
+  ChatCompletionResponse,
+  ChatCompleteAPI,
+  ChatCompleteOptions,
+  FunctionCallingMode,
+} from './api';
+export {
+  ChatCompletionEventType,
+  type ChatCompletionMessageEvent,
+  type ChatCompletionChunkEvent,
+  type ChatCompletionEvent,
+  type ChatCompletionChunkToolCall,
+  type ChatCompletionTokenCountEvent,
+} from './events';
+export {
+  MessageRole,
+  type Message,
+  type AssistantMessage,
+  type UserMessage,
+  type ToolMessage,
+} from './messages';
+export { type ToolSchema, type ToolSchemaType, type FromToolSchema } from './tool_schema';
+export {
+  ToolChoiceType,
+  type ToolOptions,
+  type ToolDefinition,
+  type ToolCall,
+  type ToolCallsOf,
+  type UnvalidatedToolCall,
+  type ToolChoice,
+} from './tools';
+export {
+  isChatCompletionChunkEvent,
+  isChatCompletionEvent,
+  isChatCompletionMessageEvent,
+  isChatCompletionTokenCountEvent,
+  withoutChunkEvents,
+  withoutTokenCountEvents,
+} from './event_utils';
+export {
+  ChatCompletionErrorCode,
+  type ChatCompletionToolNotFoundError,
+  type ChatCompletionToolValidationError,
+  type ChatCompletionTokenLimitReachedError,
+  isToolValidationError,
+  isTokenLimitReachedError,
+  isToolNotFoundError,
+} from './errors';
--- a/x-pack/packages/ai-infra/inference-common/src/chat_complete/messages.ts
+++ b/x-pack/packages/ai-infra/inference-common/src/chat_complete/messages.ts
@ -0,0 +1,75 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { ToolCall } from './tools';
+
+/**
+ * Enum for all possible {@link Message} roles.
+ */
+export enum MessageRole {
+  User = 'user',
+  Assistant = 'assistant',
+  Tool = 'tool',
+}
+
+/**
+ * Base type for all subtypes of {@link Message}.
+ */
+interface MessageBase<TRole extends MessageRole> {
+  role: TRole;
+}
+
+/**
+ * Represents a message from the user.
+ */
+export type UserMessage = MessageBase<MessageRole.User> & {
+  /**
+   * The text content of the user message
+   */
+  content: string;
+};
+
+/**
+ * Represents a message from the LLM.
+ */
+export type AssistantMessage = MessageBase<MessageRole.Assistant> & {
+  /**
+   * The text content of the message.
+   * Can be null if the LLM called a tool.
+   */
+  content: string | null;
+  /**
+   * A potential list of {@ToolCall} the LLM asked to execute.
+   * Note that LLM with parallel tool invocation can potentially call multiple tools at the same time.
+   */
+  toolCalls?: ToolCall[];
+};
+
+/**
+ * Represents a tool invocation result, following a request from the LLM to execute a tool.
+ */
+export type ToolMessage<TToolResponse extends Record<string, any> | unknown> =
+  MessageBase<MessageRole.Tool> & {
+    /**
+     * The call id matching the {@link ToolCall} this tool message is for.
+     */
+    toolCallId: string;
+    /**
+     * The response from the tool invocation.
+     */
+    response: TToolResponse;
+  };
+
+/**
+ * Mixin composed of all the possible types of messages in a chatComplete discussion.
+ *
+ * Message can be of three types:
+ * - {@link UserMessage}
+ * - {@link AssistantMessage}
+ * - {@link ToolMessage}
+ */
+export type Message = UserMessage | AssistantMessage | ToolMessage<unknown>;
--- a/x-pack/packages/ai-infra/inference-common/src/chat_complete/tool_schema.ts
+++ b/x-pack/packages/ai-infra/inference-common/src/chat_complete/tool_schema.ts
@ -72,8 +72,14 @@ type FromToolSchemaString<TToolSchemaString extends ToolSchemaTypeString> =
    ? ValuesType<TToolSchemaString['enum']>
    : string;

+/**
+ * Defines the schema for a {@link ToolDefinition}
+ */
 export type ToolSchema = ToolSchemaTypeObject;

+/**
+ * Utility type to infer the shape of a tool call from its schema.
+ */
 export type FromToolSchema<TToolSchema extends ToolSchemaType> =
  TToolSchema extends ToolSchemaTypeObject
    ? FromToolSchemaObject<TToolSchema>
--- a/x-pack/packages/ai-infra/inference-common/src/chat_complete/tools.ts
+++ b/x-pack/packages/ai-infra/inference-common/src/chat_complete/tools.ts
@ -4,15 +4,12 @@
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */
+
 import type { ValuesType } from 'utility-types';
 import { FromToolSchema, ToolSchema } from './tool_schema';

 type Assert<TValue, TType> = TValue extends TType ? TValue & TType : never;

-interface CustomToolChoice<TName extends string = string> {
-  function: TName;
-}
-
 type ToolsOfChoice<TToolOptions extends ToolOptions> = TToolOptions['toolChoice'] extends {
  function: infer TToolName;
 }
@ -21,6 +18,9 @@ type ToolsOfChoice<TToolOptions extends ToolOptions> = TToolOptions['toolChoice'
    : TToolOptions['tools']
  : TToolOptions['tools'];

+/**
+ * Utility type to infer the tool calls response shape.
+ */
 type ToolResponsesOf<TTools extends Record<string, ToolDefinition> | undefined> =
  TTools extends Record<string, ToolDefinition>
    ? Array<
@ -30,18 +30,64 @@ type ToolResponsesOf<TTools extends Record<string, ToolDefinition> | undefined>
      >
    : never[];

+/**
+ * Utility type to infer the tool call response shape.
+ */
 type ToolResponseOf<TName extends string, TToolDefinition extends ToolDefinition> = ToolCall<
  TName,
  TToolDefinition extends { schema: ToolSchema } ? FromToolSchema<TToolDefinition['schema']> : {}
 >;

+/**
+ * Tool invocation choice type.
+ *
+ * Refer to {@link ToolChoice} for more details.
+ */
+export enum ToolChoiceType {
+  none = 'none',
+  auto = 'auto',
+  required = 'required',
+}
+
+/**
+ * Represent a tool choice where the LLM is forced to call a specific tool.
+ *
+ * Refer to {@link ToolChoice} for more details.
+ */
+interface CustomToolChoice<TName extends string = string> {
+  function: TName;
+}
+
+/**
+ * Defines the tool invocation for {@link ToolOptions}, either a {@link ToolChoiceType} or {@link CustomToolChoice}.
+ * - {@link ToolChoiceType.none}: the LLM will never call a tool
+ * - {@link ToolChoiceType.auto}: the LLM will decide if it should call a tool or provide a text response
+ * - {@link ToolChoiceType.required}: the LLM will always call a tool, but will decide with one to call
+ * - {@link CustomToolChoice}: the LLM will always call the specified tool
+ */
 export type ToolChoice<TName extends string = string> = ToolChoiceType | CustomToolChoice<TName>;

+/**
+ * The definition of a tool that will be provided to the LLM for it to eventually call.
+ */
 export interface ToolDefinition {
+  /**
+   * A description of what the tool does. Note that this will be exposed to the LLM,
+   * so the description should be explicit about what the tool does and when to call it.
+   */
  description: string;
+  /**
+   * The input schema for the tool, representing the shape of the tool's parameters
+   *
+   * Even if optional, it is highly recommended to define a schema for all tool definitions, unless
+   * the tool is supposed to be called without parameters.
+   */
  schema?: ToolSchema;
 }

+/**
+ * Utility type to infer the toolCall type of {@link ChatCompletionMessageEvent}.
+ */
 export type ToolCallsOf<TToolOptions extends ToolOptions> = TToolOptions extends {
  tools?: Record<string, ToolDefinition>;
 }
@ -52,12 +98,11 @@ export type ToolCallsOf<TToolOptions extends ToolOptions> = TToolOptions extends
      }
  : { toolCalls: never };

-export enum ToolChoiceType {
-  none = 'none',
-  auto = 'auto',
-  required = 'required',
-}
-
+/**
+ * Represents a tool call from the LLM before correctly converted to the schema type.
+ *
+ * Only publicly exposed because referenced by {@link ChatCompletionToolValidationError}
+ */
 export interface UnvalidatedToolCall {
  toolCallId: string;
  function: {
@ -66,17 +111,39 @@ export interface UnvalidatedToolCall {
  };
 }

+/**
+ * Represents a tool call performed by the LLM.
+ */
 export interface ToolCall<
  TName extends string = string,
  TArguments extends Record<string, any> | undefined = Record<string, any> | undefined
 > {
+  /**
+   * The id of the tool call, that must be re-used when providing the tool call response
+   */
  toolCallId: string;
  function: {
+    /**
+     * The name of the tool that was called
+     */
    name: TName;
  } & (TArguments extends Record<string, any> ? { arguments: TArguments } : {});
 }

+/**
+ * Tool-related parameters of {@link ChatCompleteAPI}
+ */
 export interface ToolOptions<TToolNames extends string = string> {
+  /**
+   * The choice of tool execution.
+   *
+   * Refer to {@link ToolChoice}
+   */
  toolChoice?: ToolChoice<TToolNames>;
+  /**
+   * The list of tool definitions that will be exposed to the LLM.
+   *
+   * Refer to {@link ToolDefinition}.
+   */
  tools?: Record<TToolNames, ToolDefinition>;
 }
--- a/x-pack/packages/ai-infra/inference-common/src/errors.ts
+++ b/x-pack/packages/ai-infra/inference-common/src/errors.ts
@ -4,14 +4,20 @@
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */
-import { i18n } from '@kbn/i18n';
+
 import { InferenceTaskEventBase, InferenceTaskEventType } from './inference_task';

+/**
+ * Enum for generic inference error codes.
+ */
 export enum InferenceTaskErrorCode {
  internalError = 'internalError',
  requestError = 'requestError',
 }

+/**
+ * Base class for all inference API errors.
+ */
 export class InferenceTaskError<
  TCode extends string,
  TMeta extends Record<string, any> | undefined
@ -51,9 +57,7 @@ export type InferenceTaskRequestError = InferenceTaskError<
 >;

 export function createInferenceInternalError(
-  message: string = i18n.translate('xpack.inference.internalError', {
-    defaultMessage: 'An internal error occurred',
-  }),
+  message = 'An internal error occurred',
  meta?: Record<string, any>
 ): InferenceTaskInternalError {
  return new InferenceTaskError(InferenceTaskErrorCode.internalError, message, meta ?? {});
--- a/x-pack/packages/ai-infra/inference-common/src/inference_task.ts
+++ b/x-pack/packages/ai-infra/inference-common/src/inference_task.ts
@ -5,7 +5,13 @@
 * 2.0.
 */

+/**
+ * Base interface for all inference events.
+ */
 export interface InferenceTaskEventBase<TEventType extends string> {
+  /**
+   * Unique identifier of the event type.
+   */
  type: TEventType;
 }

--- a/x-pack/packages/ai-infra/inference-common/src/output/api.ts
+++ b/x-pack/packages/ai-infra/inference-common/src/output/api.ts
@ -0,0 +1,46 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { Observable } from 'rxjs';
+import type { Message, FunctionCallingMode, FromToolSchema, ToolSchema } from '../chat_complete';
+import type { OutputEvent } from './events';
+
+/**
+ * Generate a response with the LLM for a prompt, optionally based on a schema.
+ *
+ * @param {string} id The id of the operation
+ * @param {string} options.connectorId The ID of the connector that is to be used.
+ * @param {string} options.input The prompt for the LLM.
+ * @param {string} options.messages Previous messages in a conversation.
+ * @param {ToolSchema} [options.schema] The schema the response from the LLM should adhere to.
+ */
+export type OutputAPI = <
+  TId extends string = string,
+  TOutputSchema extends ToolSchema | undefined = ToolSchema | undefined
+>(
+  id: TId,
+  options: {
+    connectorId: string;
+    system?: string;
+    input: string;
+    schema?: TOutputSchema;
+    previousMessages?: Message[];
+    functionCalling?: FunctionCallingMode;
+  }
+) => OutputResponse<TId, TOutputSchema>;
+
+/**
+ * Response from the {@link OutputAPI}.
+ *
+ * Observable of {@link OutputEvent}
+ */
+export type OutputResponse<
+  TId extends string = string,
+  TOutputSchema extends ToolSchema | undefined = ToolSchema | undefined
+> = Observable<
+  OutputEvent<TId, TOutputSchema extends ToolSchema ? FromToolSchema<TOutputSchema> : undefined>
+>;
--- a/x-pack/packages/ai-infra/inference-common/src/output/event_utils.ts
+++ b/x-pack/packages/ai-infra/inference-common/src/output/event_utils.ts
@ -0,0 +1,49 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { filter, OperatorFunction } from 'rxjs';
+import { OutputCompleteEvent, OutputEvent, OutputEventType, OutputUpdateEvent } from '.';
+import type { InferenceTaskEvent } from '../inference_task';
+
+/**
+ * Check if the provided {@link ChatCompletionEvent} is a {@link ChatCompletionChunkEvent}
+ */
+export function isOutputCompleteEvent<TOutputEvent extends OutputEvent>(
+  event: TOutputEvent
+): event is Extract<TOutputEvent, OutputCompleteEvent> {
+  return event.type === OutputEventType.OutputComplete;
+}
+
+/**
+ * Check if the provided {@link InferenceTaskEvent} is a {@link OutputEvent}
+ */
+export function isOutputEvent(event: InferenceTaskEvent): event is OutputEvent {
+  return (
+    event.type === OutputEventType.OutputComplete || event.type === OutputEventType.OutputUpdate
+  );
+}
+
+/**
+ * Check if the provided {@link OutputEvent} is a {@link OutputUpdateEvent}
+ */
+export function isOutputUpdateEvent<TId extends string>(
+  event: OutputEvent
+): event is OutputUpdateEvent<TId> {
+  return event.type === OutputEventType.OutputComplete;
+}
+
+/**
+ * Operator filtering out the update events from the provided observable.
+ */
+export function withoutOutputUpdateEvents<T extends OutputEvent>(): OperatorFunction<
+  T,
+  Exclude<T, OutputUpdateEvent>
+> {
+  return filter(
+    (event): event is Exclude<T, OutputUpdateEvent> => event.type !== OutputEventType.OutputUpdate
+  );
+}
--- a/x-pack/packages/ai-infra/inference-common/src/output/events.ts
+++ b/x-pack/packages/ai-infra/inference-common/src/output/events.ts
@ -0,0 +1,65 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { InferenceTaskEventBase } from '../inference_task';
+
+/**
+ * List possible values of {@link OutputEvent} types.
+ */
+export enum OutputEventType {
+  OutputUpdate = 'output',
+  OutputComplete = 'complete',
+}
+
+/**
+ * Task output of a {@link OutputCompleteEvent}
+ */
+export type Output = Record<string, any> | undefined | unknown;
+
+/**
+ * Update (chunk) event for the {@link OutputAPI}
+ */
+export type OutputUpdateEvent<TId extends string = string> =
+  InferenceTaskEventBase<OutputEventType.OutputUpdate> & {
+    /**
+     * The id of the operation, as provided as input
+     */
+    id: TId;
+    /**
+     * The text content of the chunk
+     */
+    content: string;
+  };
+
+/**
+ * Completion (complete message) event for the {@link OutputAPI}
+ */
+export type OutputCompleteEvent<
+  TId extends string = string,
+  TOutput extends Output = Output
+> = InferenceTaskEventBase<OutputEventType.OutputComplete> & {
+  /**
+   * The id of the operation, as provided as input
+   */
+  id: TId;
+  /**
+   * The task output, following the schema specified as input
+   */
+  output: TOutput;
+  /**
+   * Potential text content provided by the LLM,
+   * if it was provided in addition to the tool call
+   */
+  content: string;
+};
+
+/**
+ * Events emitted from the {@link OutputEvent}.
+ */
+export type OutputEvent<TId extends string = string, TOutput extends Output = Output> =
+  | OutputUpdateEvent<TId>
+  | OutputCompleteEvent<TId, TOutput>;
--- a/x-pack/packages/ai-infra/inference-common/src/output/index.ts
+++ b/x-pack/packages/ai-infra/inference-common/src/output/index.ts
@ -0,0 +1,21 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export type { OutputAPI, OutputResponse } from './api';
+export {
+  OutputEventType,
+  type OutputCompleteEvent,
+  type OutputUpdateEvent,
+  type Output,
+  type OutputEvent,
+} from './events';
+export {
+  isOutputCompleteEvent,
+  isOutputUpdateEvent,
+  isOutputEvent,
+  withoutOutputUpdateEvents,
+} from './event_utils';
--- a/x-pack/packages/ai-infra/inference-common/tsconfig.json
+++ b/x-pack/packages/ai-infra/inference-common/tsconfig.json
@ -0,0 +1,20 @@
+{
+  "extends": "../../../../tsconfig.base.json",
+  "compilerOptions": {
+    "outDir": "target/types",
+    "types": [
+      "jest",
+      "node",
+      "react"
+    ]
+  },
+  "include": [
+    "**/*.ts",
+    "**/*.tsx",
+  ],
+  "exclude": [
+    "target/**/*"
+  ],
+  "kbn_references": [
+  ]
+}
--- a/x-pack/plugins/inference/common/chat_complete/index.ts
+++ b/x-pack/plugins/inference/common/chat_complete/index.ts
@ -1,99 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
- */
-
-import type { Observable } from 'rxjs';
-import type { InferenceTaskEventBase } from '../inference_task';
-import type { ToolCall, ToolCallsOf, ToolOptions } from './tools';
-
-export enum MessageRole {
-  User = 'user',
-  Assistant = 'assistant',
-  Tool = 'tool',
-}
-
-interface MessageBase<TRole extends MessageRole> {
-  role: TRole;
-}
-
-export type UserMessage = MessageBase<MessageRole.User> & { content: string };
-
-export type AssistantMessage = MessageBase<MessageRole.Assistant> & {
-  content: string | null;
-  toolCalls?: Array<ToolCall<string, Record<string, any> | undefined>>;
-};
-
-export type ToolMessage<TToolResponse extends Record<string, any> | unknown> =
-  MessageBase<MessageRole.Tool> & {
-    toolCallId: string;
-    response: TToolResponse;
-  };
-
-export type Message = UserMessage | AssistantMessage | ToolMessage<unknown>;
-
-export type ChatCompletionMessageEvent<TToolOptions extends ToolOptions = ToolOptions> =
-  InferenceTaskEventBase<ChatCompletionEventType.ChatCompletionMessage> & {
-    content: string;
-  } & { toolCalls: ToolCallsOf<TToolOptions>['toolCalls'] };
-
-export type ChatCompletionResponse<TToolOptions extends ToolOptions = ToolOptions> = Observable<
-  ChatCompletionEvent<TToolOptions>
->;
-
-export enum ChatCompletionEventType {
-  ChatCompletionChunk = 'chatCompletionChunk',
-  ChatCompletionTokenCount = 'chatCompletionTokenCount',
-  ChatCompletionMessage = 'chatCompletionMessage',
-}
-
-export interface ChatCompletionChunkToolCall {
-  index: number;
-  toolCallId: string;
-  function: {
-    name: string;
-    arguments: string;
-  };
-}
-
-export type ChatCompletionChunkEvent =
-  InferenceTaskEventBase<ChatCompletionEventType.ChatCompletionChunk> & {
-    content: string;
-    tool_calls: ChatCompletionChunkToolCall[];
-  };
-
-export type ChatCompletionTokenCountEvent =
-  InferenceTaskEventBase<ChatCompletionEventType.ChatCompletionTokenCount> & {
-    tokens: {
-      prompt: number;
-      completion: number;
-      total: number;
-    };
-  };
-
-export type ChatCompletionEvent<TToolOptions extends ToolOptions = ToolOptions> =
-  | ChatCompletionChunkEvent
-  | ChatCompletionTokenCountEvent
-  | ChatCompletionMessageEvent<TToolOptions>;
-
-export type FunctionCallingMode = 'native' | 'simulated';
-
-/**
- * Request a completion from the LLM based on a prompt or conversation.
- *
- * @param {string} options.connectorId The ID of the connector to use
- * @param {string} [options.system] A system message that defines the behavior of the LLM.
- * @param {Message[]} options.message A list of messages that make up the conversation to be completed.
- * @param {ToolChoice} [options.toolChoice] Force the LLM to call a (specific) tool, or no tool
- * @param {Record<string, ToolDefinition>} [options.tools] A map of tools that can be called by the LLM
- */
-export type ChatCompleteAPI = <TToolOptions extends ToolOptions = ToolOptions>(
-  options: {
-    connectorId: string;
-    system?: string;
-    messages: Message[];
-    functionCalling?: FunctionCallingMode;
-  } & TToolOptions
-) => ChatCompletionResponse<TToolOptions>;
--- a/x-pack/plugins/inference/common/chat_complete/is_chat_completion_chunk_event.ts
+++ b/x-pack/plugins/inference/common/chat_complete/is_chat_completion_chunk_event.ts
@ -1,14 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
- */
-
-import { ChatCompletionChunkEvent, ChatCompletionEvent, ChatCompletionEventType } from '.';
-
-export function isChatCompletionChunkEvent(
-  event: ChatCompletionEvent
-): event is ChatCompletionChunkEvent {
-  return event.type === ChatCompletionEventType.ChatCompletionChunk;
-}
--- a/x-pack/plugins/inference/common/chat_complete/is_chat_completion_event.ts
+++ b/x-pack/plugins/inference/common/chat_complete/is_chat_completion_event.ts
@ -1,17 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
- */
-
-import { ChatCompletionEvent, ChatCompletionEventType } from '.';
-import { InferenceTaskEvent } from '../inference_task';
-
-export function isChatCompletionEvent(event: InferenceTaskEvent): event is ChatCompletionEvent {
-  return (
-    event.type === ChatCompletionEventType.ChatCompletionChunk ||
-    event.type === ChatCompletionEventType.ChatCompletionMessage ||
-    event.type === ChatCompletionEventType.ChatCompletionTokenCount
-  );
-}
--- a/x-pack/plugins/inference/common/chat_complete/is_chat_completion_message_event.ts
+++ b/x-pack/plugins/inference/common/chat_complete/is_chat_completion_message_event.ts
@ -1,15 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
- */
-
-import { ChatCompletionEvent, ChatCompletionEventType, ChatCompletionMessageEvent } from '.';
-import type { ToolOptions } from './tools';
-
-export function isChatCompletionMessageEvent<T extends ToolOptions<string>>(
-  event: ChatCompletionEvent<T>
-): event is ChatCompletionMessageEvent<T> {
-  return event.type === ChatCompletionEventType.ChatCompletionMessage;
-}
--- a/x-pack/plugins/inference/common/chat_complete/without_chunk_events.ts
+++ b/x-pack/plugins/inference/common/chat_complete/without_chunk_events.ts
@ -1,19 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
- */
-
-import { filter, OperatorFunction } from 'rxjs';
-import { ChatCompletionChunkEvent, ChatCompletionEvent, ChatCompletionEventType } from '.';
-
-export function withoutChunkEvents<T extends ChatCompletionEvent>(): OperatorFunction<
-  T,
-  Exclude<T, ChatCompletionChunkEvent>
-> {
-  return filter(
-    (event): event is Exclude<T, ChatCompletionChunkEvent> =>
-      event.type !== ChatCompletionEventType.ChatCompletionChunk
-  );
-}
--- a/x-pack/plugins/inference/common/chat_complete/without_token_count_events.ts
+++ b/x-pack/plugins/inference/common/chat_complete/without_token_count_events.ts
@ -1,19 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
- */
-
-import { filter, OperatorFunction } from 'rxjs';
-import { ChatCompletionEvent, ChatCompletionEventType, ChatCompletionTokenCountEvent } from '.';
-
-export function withoutTokenCountEvents<T extends ChatCompletionEvent>(): OperatorFunction<
-  T,
-  Exclude<T, ChatCompletionTokenCountEvent>
-> {
-  return filter(
-    (event): event is Exclude<T, ChatCompletionTokenCountEvent> =>
-      event.type !== ChatCompletionEventType.ChatCompletionTokenCount
-  );
-}
--- a/x-pack/plugins/inference/common/connectors.ts
+++ b/x-pack/plugins/inference/common/connectors.ts
@ -22,7 +22,3 @@ export interface InferenceConnector {
 export function isSupportedConnectorType(id: string): id is InferenceConnectorType {
  return allSupportedConnectorTypes.includes(id as InferenceConnectorType);
 }
-
-export interface GetConnectorsResponseBody {
-  connectors: InferenceConnector[];
-}
--- a/x-pack/plugins/inference/common/output/create_output_api.ts
+++ b/x-pack/plugins/inference/common/output/create_output_api.ts
@ -6,10 +6,16 @@
 */

 import { map } from 'rxjs';
-import { ChatCompleteAPI, ChatCompletionEventType, MessageRole } from '../chat_complete';
-import { withoutTokenCountEvents } from '../chat_complete/without_token_count_events';
-import { OutputAPI, OutputEvent, OutputEventType } from '.';
-import { ensureMultiTurn } from '../ensure_multi_turn';
+import {
+  OutputAPI,
+  OutputEvent,
+  OutputEventType,
+  ChatCompleteAPI,
+  ChatCompletionEventType,
+  MessageRole,
+  withoutTokenCountEvents,
+} from '@kbn/inference-common';
+import { ensureMultiTurn } from './utils/ensure_multi_turn';

 export function createOutputApi(chatCompleteApi: ChatCompleteAPI): OutputAPI {
  return (id, { connectorId, input, schema, system, previousMessages, functionCalling }) => {
--- a/x-pack/plugins/inference/common/chat_complete/request.ts
+++ b/x-pack/plugins/inference/common/chat_complete/request.ts
@ -5,8 +5,8 @@
 * 2.0.
 */

-import type { Message, FunctionCallingMode } from '.';
-import type { ToolOptions } from './tools';
+import type { FunctionCallingMode, Message, ToolOptions } from '@kbn/inference-common';
+import { InferenceConnector } from './connectors';

 export type ChatCompleteRequestBody = {
  connectorId: string;
@ -15,3 +15,7 @@ export type ChatCompleteRequestBody = {
  messages: Message[];
  functionCalling?: FunctionCallingMode;
 } & ToolOptions;
+
+export interface GetConnectorsResponseBody {
+  connectors: InferenceConnector[];
+}
--- a/x-pack/plugins/inference/common/index.ts
+++ b/x-pack/plugins/inference/common/index.ts
@ -10,22 +10,8 @@ export {
  splitIntoCommands,
 } from './tasks/nl_to_esql/correct_common_esql_mistakes';

-export { isChatCompletionChunkEvent } from './chat_complete/is_chat_completion_chunk_event';
-export { isChatCompletionMessageEvent } from './chat_complete/is_chat_completion_message_event';
-export { isChatCompletionEvent } from './chat_complete/is_chat_completion_event';
+export { generateFakeToolCallId } from './utils/generate_fake_tool_call_id';

-export { isOutputUpdateEvent } from './output/is_output_update_event';
-export { isOutputCompleteEvent } from './output/is_output_complete_event';
-export { isOutputEvent } from './output/is_output_event';
+export { createOutputApi } from './create_output_api';

-export type { ToolSchema } from './chat_complete/tool_schema';
-
-export {
-  type Message,
-  MessageRole,
-  type ToolMessage,
-  type AssistantMessage,
-  type UserMessage,
-} from './chat_complete';
-
-export { generateFakeToolCallId } from './chat_complete/generate_fake_tool_call_id';
+export type { ChatCompleteRequestBody, GetConnectorsResponseBody } from './http_apis';
--- a/x-pack/plugins/inference/common/output/index.ts
+++ b/x-pack/plugins/inference/common/output/index.ts
@ -1,81 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
- */
-
-import { Observable } from 'rxjs';
-import { ServerSentEventBase } from '@kbn/sse-utils';
-import { FromToolSchema, ToolSchema } from '../chat_complete/tool_schema';
-import type { Message, FunctionCallingMode } from '../chat_complete';
-
-export enum OutputEventType {
-  OutputUpdate = 'output',
-  OutputComplete = 'complete',
-}
-
-type Output = Record<string, any> | undefined | unknown;
-
-export type OutputUpdateEvent<TId extends string = string> = ServerSentEventBase<
-  OutputEventType.OutputUpdate,
-  {
-    id: TId;
-    content: string;
-  }
->;
-
-export type OutputCompleteEvent<
-  TId extends string = string,
-  TOutput extends Output = Output
-> = ServerSentEventBase<
-  OutputEventType.OutputComplete,
-  {
-    id: TId;
-    output: TOutput;
-    content: string;
-  }
->;
-
-export type OutputEvent<TId extends string = string, TOutput extends Output = Output> =
-  | OutputUpdateEvent<TId>
-  | OutputCompleteEvent<TId, TOutput>;
-
-/**
- * Generate a response with the LLM for a prompt, optionally based on a schema.
- *
- * @param {string} id The id of the operation
- * @param {string} options.connectorId The ID of the connector that is to be used.
- * @param {string} options.input The prompt for the LLM.
- * @param {string} options.messages Previous messages in a conversation.
- * @param {ToolSchema} [options.schema] The schema the response from the LLM should adhere to.
- */
-export type OutputAPI = <
-  TId extends string = string,
-  TOutputSchema extends ToolSchema | undefined = ToolSchema | undefined
->(
-  id: TId,
-  options: {
-    connectorId: string;
-    system?: string;
-    input: string;
-    schema?: TOutputSchema;
-    previousMessages?: Message[];
-    functionCalling?: FunctionCallingMode;
-  }
-) => Observable<
-  OutputEvent<TId, TOutputSchema extends ToolSchema ? FromToolSchema<TOutputSchema> : undefined>
->;
-
-export function createOutputCompleteEvent<TId extends string, TOutput extends Output>(
-  id: TId,
-  output: TOutput,
-  content?: string
-): OutputCompleteEvent<TId, TOutput> {
-  return {
-    type: OutputEventType.OutputComplete,
-    id,
-    output,
-    content: content ?? '',
-  };
-}
--- a/x-pack/plugins/inference/common/output/is_output_complete_event.ts
+++ b/x-pack/plugins/inference/common/output/is_output_complete_event.ts
@ -1,14 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
- */
-
-import { OutputEvent, OutputEventType, OutputUpdateEvent } from '.';
-
-export function isOutputCompleteEvent<TOutputEvent extends OutputEvent>(
-  event: TOutputEvent
-): event is Exclude<TOutputEvent, OutputUpdateEvent> {
-  return event.type === OutputEventType.OutputComplete;
-}
--- a/x-pack/plugins/inference/common/output/is_output_event.ts
+++ b/x-pack/plugins/inference/common/output/is_output_event.ts
@ -1,15 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
- */
-
-import { OutputEvent, OutputEventType } from '.';
-import type { InferenceTaskEvent } from '../inference_task';
-
-export function isOutputEvent(event: InferenceTaskEvent): event is OutputEvent {
-  return (
-    event.type === OutputEventType.OutputComplete || event.type === OutputEventType.OutputUpdate
-  );
-}
--- a/x-pack/plugins/inference/common/output/without_output_update_events.ts
+++ b/x-pack/plugins/inference/common/output/without_output_update_events.ts
@ -1,18 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
- */
-
-import { filter, OperatorFunction } from 'rxjs';
-import { OutputEvent, OutputEventType, OutputUpdateEvent } from '.';
-
-export function withoutOutputUpdateEvents<T extends OutputEvent>(): OperatorFunction<
-  T,
-  Exclude<T, OutputUpdateEvent>
-> {
-  return filter(
-    (event): event is Exclude<T, OutputUpdateEvent> => event.type !== OutputEventType.OutputUpdate
-  );
-}
--- a/x-pack/plugins/inference/common/tasks/nl_to_esql/correct_query_with_actions.ts
+++ b/x-pack/plugins/inference/common/tasks/nl_to_esql/correct_query_with_actions.ts
@ -4,6 +4,7 @@
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */
+
 import { validateQuery, getActions } from '@kbn/esql-validation-autocomplete';
 import { getAstAndSyntaxErrors } from '@kbn/esql-ast';

--- a/x-pack/plugins/inference/common/utils/ensure_multi_turn.ts
+++ b/x-pack/plugins/inference/common/utils/ensure_multi_turn.ts
@ -5,7 +5,7 @@
 * 2.0.
 */

-import { Message, MessageRole } from './chat_complete';
+import { Message, MessageRole } from '@kbn/inference-common';

 function isUserMessage(message: Message): boolean {
  return message.role !== MessageRole.Assistant;
--- a/x-pack/plugins/inference/common/chat_complete/generate_fake_tool_call_id.ts
+++ b/x-pack/plugins/inference/common/chat_complete/generate_fake_tool_call_id.ts
--- a/x-pack/plugins/inference/common/utils/truncate_list.ts
+++ b/x-pack/plugins/inference/common/utils/truncate_list.ts
--- a/x-pack/plugins/inference/public/chat_complete/index.ts
+++ b/x-pack/plugins/inference/public/chat_complete/index.ts
@ -7,9 +7,9 @@

 import { from } from 'rxjs';
 import type { HttpStart } from '@kbn/core/public';
-import type { ChatCompleteAPI } from '../../common/chat_complete';
-import type { ChatCompleteRequestBody } from '../../common/chat_complete/request';
-import { httpResponseIntoObservable } from '../util/http_response_into_observable';
+import type { ChatCompleteAPI } from '@kbn/inference-common';
+import type { ChatCompleteRequestBody } from '../common/http_apis';
+import { httpResponseIntoObservable } from './util/http_response_into_observable';

 export function createChatCompleteApi({ http }: { http: HttpStart }): ChatCompleteAPI {
  return ({ connectorId, messages, system, toolChoice, tools, functionCalling }) => {
--- a/x-pack/plugins/inference/public/index.ts
+++ b/x-pack/plugins/inference/public/index.ts
@ -4,9 +4,8 @@
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */
-import type { PluginInitializer, PluginInitializerContext } from '@kbn/core/public';

-import { InferencePlugin } from './plugin';
+import type { PluginInitializer, PluginInitializerContext } from '@kbn/core/public';
 import type {
  InferencePublicSetup,
  InferencePublicStart,
@ -14,6 +13,7 @@ import type {
  InferenceStartDependencies,
  ConfigSchema,
 } from './types';
+import { InferencePlugin } from './plugin';

 export { httpResponseIntoObservable } from './util/http_response_into_observable';

--- a/x-pack/plugins/inference/public/plugin.tsx
+++ b/x-pack/plugins/inference/public/plugin.tsx
@ -7,8 +7,8 @@

 import type { CoreSetup, CoreStart, Plugin, PluginInitializerContext } from '@kbn/core/public';
 import type { Logger } from '@kbn/logging';
-import { createOutputApi } from '../common/output/create_output_api';
-import type { GetConnectorsResponseBody } from '../common/connectors';
+import { createOutputApi } from '../common/create_output_api';
+import type { GetConnectorsResponseBody } from '../common/http_apis';
 import { createChatCompleteApi } from './chat_complete';
 import type {
  ConfigSchema,
@ -41,10 +41,11 @@ export class InferencePlugin

  start(coreStart: CoreStart, pluginsStart: InferenceStartDependencies): InferencePublicStart {
    const chatComplete = createChatCompleteApi({ http: coreStart.http });
+    const output = createOutputApi(chatComplete);

    return {
      chatComplete,
-      output: createOutputApi(chatComplete),
+      output,
      getConnectors: async () => {
        const res = await coreStart.http.get<GetConnectorsResponseBody>(
          '/internal/inference/connectors'
--- a/x-pack/plugins/inference/public/types.ts
+++ b/x-pack/plugins/inference/public/types.ts
@ -4,9 +4,9 @@
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */
-import type { ChatCompleteAPI } from '../common/chat_complete';
+
+import type { ChatCompleteAPI, OutputAPI } from '@kbn/inference-common';
 import type { InferenceConnector } from '../common/connectors';
-import type { OutputAPI } from '../common/output';

 /* eslint-disable @typescript-eslint/no-empty-interface*/

--- a/x-pack/plugins/inference/public/util/create_observable_from_http_response.ts
+++ b/x-pack/plugins/inference/public/util/create_observable_from_http_response.ts
@ -4,9 +4,10 @@
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */
+
 import { createParser } from 'eventsource-parser';
 import { Observable, throwError } from 'rxjs';
-import { createInferenceInternalError } from '../../common/errors';
+import { createInferenceInternalError } from '@kbn/inference-common';

 export interface StreamedHttpResponse {
  response?: { body: ReadableStream<Uint8Array> | null | undefined };
--- a/x-pack/plugins/inference/public/util/http_response_into_observable.test.ts
+++ b/x-pack/plugins/inference/public/util/http_response_into_observable.test.ts
@ -7,10 +7,12 @@

 import { lastValueFrom, of, toArray } from 'rxjs';
 import { httpResponseIntoObservable } from './http_response_into_observable';
+import {
+  ChatCompletionEventType,
+  InferenceTaskEventType,
+  InferenceTaskErrorCode,
+} from '@kbn/inference-common';
 import type { StreamedHttpResponse } from './create_observable_from_http_response';
-import { ChatCompletionEventType } from '../../common/chat_complete';
-import { InferenceTaskEventType } from '../../common/inference_task';
-import { InferenceTaskErrorCode } from '../../common/errors';

 function toSse(...events: Array<Record<string, any>>) {
  return events.map((event) => new TextEncoder().encode(`data: ${JSON.stringify(event)}\n\n`));
--- a/x-pack/plugins/inference/public/util/http_response_into_observable.ts
+++ b/x-pack/plugins/inference/public/util/http_response_into_observable.ts
@ -10,8 +10,9 @@ import {
  createInferenceInternalError,
  InferenceTaskError,
  InferenceTaskErrorEvent,
-} from '../../common/errors';
-import { InferenceTaskEvent, InferenceTaskEventType } from '../../common/inference_task';
+  InferenceTaskEvent,
+  InferenceTaskEventType,
+} from '@kbn/inference-common';
 import {
  createObservableFromHttpResponse,
  StreamedHttpResponse,
--- a/x-pack/plugins/inference/scripts/evaluation/evaluation_client.ts
+++ b/x-pack/plugins/inference/scripts/evaluation/evaluation_client.ts
@ -7,8 +7,7 @@

 import { remove } from 'lodash';
 import { lastValueFrom } from 'rxjs';
-import type { OutputAPI } from '../../common/output';
-import { withoutOutputUpdateEvents } from '../../common/output/without_output_update_events';
+import { type OutputAPI, withoutOutputUpdateEvents } from '@kbn/inference-common';
 import type { EvaluationResult } from './types';

 export interface InferenceEvaluationClient {
--- a/x-pack/plugins/inference/scripts/evaluation/scenarios/esql/index.spec.ts
+++ b/x-pack/plugins/inference/scripts/evaluation/scenarios/esql/index.spec.ts
@ -8,11 +8,12 @@
 /// <reference types="@kbn/ambient-ftr-types"/>

 import expect from '@kbn/expect';
+import type { Logger } from '@kbn/logging';
 import { firstValueFrom, lastValueFrom, filter } from 'rxjs';
+import { isOutputCompleteEvent } from '@kbn/inference-common';
 import { naturalLanguageToEsql } from '../../../../server/tasks/nl_to_esql';
 import { chatClient, evaluationClient, logger } from '../../services';
 import { EsqlDocumentBase } from '../../../../server/tasks/nl_to_esql/doc_base';
-import { isOutputCompleteEvent } from '../../../../common';

 interface TestCase {
  title: string;
@ -40,7 +41,7 @@ const callNaturalLanguageToEsql = async (question: string) => {
        debug: (source) => {
          logger.debug(typeof source === 'function' ? source() : source);
        },
-      },
+      } as Logger,
    })
  );
 };
--- a/x-pack/plugins/inference/scripts/load_esql_docs/utils/output_executor.ts
+++ b/x-pack/plugins/inference/scripts/load_esql_docs/utils/output_executor.ts
@ -6,7 +6,7 @@
 */

 import { lastValueFrom } from 'rxjs';
-import type { OutputAPI } from '../../../common/output';
+import type { OutputAPI } from '@kbn/inference-common';

 export interface Prompt {
  system?: string;
--- a/x-pack/plugins/inference/scripts/util/cli_options.ts
+++ b/x-pack/plugins/inference/scripts/util/cli_options.ts
@ -4,6 +4,7 @@
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */
+
 import { format, parse } from 'url';
 import { readKibanaConfig } from './read_kibana_config';

--- a/x-pack/plugins/inference/scripts/util/kibana_client.ts
+++ b/x-pack/plugins/inference/scripts/util/kibana_client.ts
@ -13,18 +13,19 @@ import { from, map, switchMap, throwError } from 'rxjs';
 import { UrlObject, format, parse } from 'url';
 import { inspect } from 'util';
 import { isReadable } from 'stream';
-import type { ChatCompleteAPI, ChatCompletionEvent } from '../../common/chat_complete';
-import { ChatCompleteRequestBody } from '../../common/chat_complete/request';
-import type { InferenceConnector } from '../../common/connectors';
 import {
+  ChatCompleteAPI,
+  OutputAPI,
+  ChatCompletionEvent,
  InferenceTaskError,
  InferenceTaskErrorEvent,
+  InferenceTaskEventType,
  createInferenceInternalError,
-} from '../../common/errors';
-import { InferenceTaskEventType } from '../../common/inference_task';
-import type { OutputAPI } from '../../common/output';
-import { createOutputApi } from '../../common/output/create_output_api';
-import { withoutOutputUpdateEvents } from '../../common/output/without_output_update_events';
+  withoutOutputUpdateEvents,
+} from '@kbn/inference-common';
+import type { ChatCompleteRequestBody } from '../../common/http_apis';
+import type { InferenceConnector } from '../../common/connectors';
+import { createOutputApi } from '../../common/create_output_api';
 import { eventSourceStreamIntoObservable } from '../../server/util/event_source_stream_into_observable';

 // eslint-disable-next-line spaced-comment
--- a/x-pack/plugins/inference/server/chat_complete/adapters/bedrock/bedrock_claude_adapter.test.ts
+++ b/x-pack/plugins/inference/server/chat_complete/adapters/bedrock/bedrock_claude_adapter.test.ts
@ -8,8 +8,7 @@
 import { PassThrough } from 'stream';
 import { loggerMock } from '@kbn/logging-mocks';
 import type { InferenceExecutor } from '../../utils/inference_executor';
-import { MessageRole } from '../../../../common/chat_complete';
-import { ToolChoiceType } from '../../../../common/chat_complete/tools';
+import { MessageRole, ToolChoiceType } from '@kbn/inference-common';
 import { bedrockClaudeAdapter } from './bedrock_claude_adapter';
 import { addNoToolUsageDirective } from './prompts';

--- a/x-pack/plugins/inference/server/chat_complete/adapters/bedrock/bedrock_claude_adapter.ts
+++ b/x-pack/plugins/inference/server/chat_complete/adapters/bedrock/bedrock_claude_adapter.ts
@ -7,10 +7,15 @@

 import { filter, from, map, switchMap, tap } from 'rxjs';
 import { Readable } from 'stream';
+import {
+  Message,
+  MessageRole,
+  createInferenceInternalError,
+  ToolChoiceType,
+  ToolSchemaType,
+  type ToolOptions,
+} from '@kbn/inference-common';
 import { parseSerdeChunkMessage } from './serde_utils';
-import { Message, MessageRole } from '../../../../common/chat_complete';
-import { createInferenceInternalError } from '../../../../common/errors';
-import { ToolChoiceType, type ToolOptions } from '../../../../common/chat_complete/tools';
 import { InferenceConnectorAdapter } from '../../types';
 import type { BedRockMessage, BedrockToolChoice } from './types';
 import {
@ -19,7 +24,6 @@ import {
 } from './serde_eventstream_into_observable';
 import { processCompletionChunks } from './process_completion_chunks';
 import { addNoToolUsageDirective } from './prompts';
-import { ToolSchemaType } from '../../../../common/chat_complete/tool_schema';

 export const bedrockClaudeAdapter: InferenceConnectorAdapter = {
  chatComplete: ({ executor, system, messages, toolChoice, tools }) => {
--- a/x-pack/plugins/inference/server/chat_complete/adapters/bedrock/process_completion_chunks.ts
+++ b/x-pack/plugins/inference/server/chat_complete/adapters/bedrock/process_completion_chunks.ts
@ -11,7 +11,7 @@ import {
  ChatCompletionTokenCountEvent,
  ChatCompletionChunkToolCall,
  ChatCompletionEventType,
-} from '../../../../common/chat_complete';
+} from '@kbn/inference-common';
 import type { CompletionChunk, MessageStopChunk } from './types';

 export function processCompletionChunks() {
--- a/x-pack/plugins/inference/server/chat_complete/adapters/bedrock/serde_eventstream_into_observable.ts
+++ b/x-pack/plugins/inference/server/chat_complete/adapters/bedrock/serde_eventstream_into_observable.ts
@ -11,7 +11,7 @@ import { identity } from 'lodash';
 import { Observable } from 'rxjs';
 import { Readable } from 'stream';
 import { Message } from '@smithy/types';
-import { createInferenceInternalError } from '../../../../common/errors';
+import { createInferenceInternalError } from '@kbn/inference-common';

 interface ModelStreamErrorException {
  name: 'ModelStreamErrorException';
--- a/x-pack/plugins/inference/server/chat_complete/adapters/gemini/gemini_adapter.test.ts
+++ b/x-pack/plugins/inference/server/chat_complete/adapters/gemini/gemini_adapter.test.ts
@ -11,8 +11,7 @@ import { noop, tap, lastValueFrom, toArray, Subject } from 'rxjs';
 import { loggerMock } from '@kbn/logging-mocks';
 import type { InferenceExecutor } from '../../utils/inference_executor';
 import { observableIntoEventSourceStream } from '../../../util/observable_into_event_source_stream';
-import { MessageRole } from '../../../../common/chat_complete';
-import { ToolChoiceType } from '../../../../common/chat_complete/tools';
+import { MessageRole, ToolChoiceType } from '@kbn/inference-common';
 import { geminiAdapter } from './gemini_adapter';

 describe('geminiAdapter', () => {
--- a/x-pack/plugins/inference/server/chat_complete/adapters/gemini/gemini_adapter.ts
+++ b/x-pack/plugins/inference/server/chat_complete/adapters/gemini/gemini_adapter.ts
@ -8,10 +8,15 @@
 import * as Gemini from '@google/generative-ai';
 import { from, map, switchMap } from 'rxjs';
 import { Readable } from 'stream';
+import {
+  Message,
+  MessageRole,
+  ToolChoiceType,
+  ToolOptions,
+  ToolSchema,
+  ToolSchemaType,
+} from '@kbn/inference-common';
 import type { InferenceConnectorAdapter } from '../../types';
-import { Message, MessageRole } from '../../../../common/chat_complete';
-import { ToolChoiceType, ToolOptions } from '../../../../common/chat_complete/tools';
-import type { ToolSchema, ToolSchemaType } from '../../../../common/chat_complete/tool_schema';
 import { eventSourceStreamIntoObservable } from '../../../util/event_source_stream_into_observable';
 import { processVertexStream } from './process_vertex_stream';
 import type { GenerateContentResponseChunk, GeminiMessage, GeminiToolConfig } from './types';
--- a/x-pack/plugins/inference/server/chat_complete/adapters/gemini/process_vertex_stream.test.ts
+++ b/x-pack/plugins/inference/server/chat_complete/adapters/gemini/process_vertex_stream.test.ts
@ -6,7 +6,7 @@
 */

 import { TestScheduler } from 'rxjs/testing';
-import { ChatCompletionEventType } from '../../../../common/chat_complete';
+import { ChatCompletionEventType } from '@kbn/inference-common';
 import { processVertexStream } from './process_vertex_stream';
 import type { GenerateContentResponseChunk } from './types';

--- a/x-pack/plugins/inference/server/chat_complete/adapters/gemini/process_vertex_stream.ts
+++ b/x-pack/plugins/inference/server/chat_complete/adapters/gemini/process_vertex_stream.ts
@ -10,7 +10,7 @@ import {
  ChatCompletionChunkEvent,
  ChatCompletionTokenCountEvent,
  ChatCompletionEventType,
-} from '../../../../common/chat_complete';
+} from '@kbn/inference-common';
 import { generateFakeToolCallId } from '../../../../common';
 import type { GenerateContentResponseChunk } from './types';

--- a/x-pack/plugins/inference/server/chat_complete/adapters/openai/openai_adapter.test.ts
+++ b/x-pack/plugins/inference/server/chat_complete/adapters/openai/openai_adapter.test.ts
@ -12,7 +12,7 @@ import { pick } from 'lodash';
 import { lastValueFrom, Subject, toArray } from 'rxjs';
 import type { Logger } from '@kbn/logging';
 import { loggerMock } from '@kbn/logging-mocks';
-import { ChatCompletionEventType, MessageRole } from '../../../../common/chat_complete';
+import { ChatCompletionEventType, MessageRole } from '@kbn/inference-common';
 import { observableIntoEventSourceStream } from '../../../util/observable_into_event_source_stream';
 import { InferenceExecutor } from '../../utils/inference_executor';
 import { openAIAdapter } from '.';
--- a/x-pack/plugins/inference/server/chat_complete/adapters/openai/openai_adapter.ts
+++ b/x-pack/plugins/inference/server/chat_complete/adapters/openai/openai_adapter.ts
@ -20,10 +20,10 @@ import {
  ChatCompletionEventType,
  Message,
  MessageRole,
-} from '../../../../common/chat_complete';
-import type { ToolOptions } from '../../../../common/chat_complete/tools';
-import { createTokenLimitReachedError } from '../../../../common/chat_complete/errors';
-import { createInferenceInternalError } from '../../../../common/errors';
+  ToolOptions,
+  createInferenceInternalError,
+} from '@kbn/inference-common';
+import { createTokenLimitReachedError } from '../../errors';
 import { eventSourceStreamIntoObservable } from '../../../util/event_source_stream_into_observable';
 import type { InferenceConnectorAdapter } from '../../types';
 import {
--- a/x-pack/plugins/inference/server/chat_complete/api.ts
+++ b/x-pack/plugins/inference/server/chat_complete/api.ts
@ -9,8 +9,11 @@ import { last } from 'lodash';
 import { defer, switchMap, throwError } from 'rxjs';
 import type { Logger } from '@kbn/logging';
 import type { KibanaRequest } from '@kbn/core-http-server';
-import type { ChatCompleteAPI, ChatCompletionResponse } from '../../common/chat_complete';
-import { createInferenceRequestError } from '../../common/errors';
+import {
+  type ChatCompleteAPI,
+  type ChatCompletionResponse,
+  createInferenceRequestError,
+} from '@kbn/inference-common';
 import type { InferenceStartDependencies } from '../types';
 import { getConnectorById } from '../util/get_connector_by_id';
 import { getInferenceAdapter } from './adapters';
--- a/x-pack/plugins/inference/server/chat_complete/errors.ts
+++ b/x-pack/plugins/inference/server/chat_complete/errors.ts
@ -0,0 +1,51 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { InferenceTaskError, type UnvalidatedToolCall } from '@kbn/inference-common';
+import { i18n } from '@kbn/i18n';
+import {
+  ChatCompletionErrorCode,
+  ChatCompletionTokenLimitReachedError,
+  ChatCompletionToolNotFoundError,
+  ChatCompletionToolValidationError,
+} from '@kbn/inference-common/src/chat_complete/errors';
+
+export function createTokenLimitReachedError(
+  tokenLimit?: number,
+  tokenCount?: number
+): ChatCompletionTokenLimitReachedError {
+  return new InferenceTaskError(
+    ChatCompletionErrorCode.TokenLimitReachedError,
+    i18n.translate('xpack.inference.chatCompletionError.tokenLimitReachedError', {
+      defaultMessage: `Token limit reached. Token limit is {tokenLimit}, but the current conversation has {tokenCount} tokens.`,
+      values: { tokenLimit, tokenCount },
+    }),
+    { tokenLimit, tokenCount }
+  );
+}
+
+export function createToolNotFoundError(name: string): ChatCompletionToolNotFoundError {
+  return new InferenceTaskError(
+    ChatCompletionErrorCode.ToolNotFoundError,
+    `Tool ${name} called but was not available`,
+    {
+      name,
+    }
+  );
+}
+
+export function createToolValidationError(
+  message: string,
+  meta: {
+    name?: string;
+    arguments?: string;
+    errorsText?: string;
+    toolCalls?: UnvalidatedToolCall[];
+  }
+): ChatCompletionToolValidationError {
+  return new InferenceTaskError(ChatCompletionErrorCode.ToolValidationError, message, meta);
+}
--- a/x-pack/plugins/inference/server/chat_complete/simulated_function_calling/get_system_instructions.ts
+++ b/x-pack/plugins/inference/server/chat_complete/simulated_function_calling/get_system_instructions.ts
@ -5,8 +5,8 @@
 * 2.0.
 */

+import { ToolDefinition } from '@kbn/inference-common';
 import { TOOL_USE_END, TOOL_USE_START } from './constants';
-import { ToolDefinition } from '../../../common/chat_complete/tools';

 export function getSystemMessageInstructions({
  tools,
--- a/x-pack/plugins/inference/server/chat_complete/simulated_function_calling/parse_inline_function_calls.ts
+++ b/x-pack/plugins/inference/server/chat_complete/simulated_function_calling/parse_inline_function_calls.ts
@ -8,11 +8,11 @@
 import { Observable } from 'rxjs';
 import { Logger } from '@kbn/logging';
 import {
+  createInferenceInternalError,
  ChatCompletionChunkEvent,
  ChatCompletionTokenCountEvent,
  ChatCompletionEventType,
-} from '../../../common/chat_complete';
-import { createInferenceInternalError } from '../../../common/errors';
+} from '@kbn/inference-common';
 import { TOOL_USE_END, TOOL_USE_START } from './constants';

 function matchOnSignalStart(buffer: string) {
--- a/x-pack/plugins/inference/server/chat_complete/simulated_function_calling/wrap_with_simulated_function_calling.ts
+++ b/x-pack/plugins/inference/server/chat_complete/simulated_function_calling/wrap_with_simulated_function_calling.ts
@ -5,9 +5,16 @@
 * 2.0.
 */

-import { AssistantMessage, Message, ToolMessage, UserMessage } from '../../../common';
-import { MessageRole } from '../../../common/chat_complete';
-import { ToolChoice, ToolChoiceType, ToolDefinition } from '../../../common/chat_complete/tools';
+import {
+  MessageRole,
+  AssistantMessage,
+  Message,
+  ToolMessage,
+  UserMessage,
+  ToolChoice,
+  ToolChoiceType,
+  ToolDefinition,
+} from '@kbn/inference-common';
 import { TOOL_USE_END, TOOL_USE_START } from './constants';
 import { getSystemMessageInstructions } from './get_system_instructions';

--- a/x-pack/plugins/inference/server/chat_complete/types.ts
+++ b/x-pack/plugins/inference/server/chat_complete/types.ts
@ -12,8 +12,8 @@ import type {
  ChatCompletionTokenCountEvent,
  FunctionCallingMode,
  Message,
-} from '../../common/chat_complete';
-import type { ToolOptions } from '../../common/chat_complete/tools';
+  ToolOptions,
+} from '@kbn/inference-common';
 import type { InferenceExecutor } from './utils';

 /**
--- a/x-pack/plugins/inference/server/chat_complete/utils/chunks_into_message.test.ts
+++ b/x-pack/plugins/inference/server/chat_complete/utils/chunks_into_message.test.ts
@ -4,13 +4,14 @@
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */
+
 import { lastValueFrom, of } from 'rxjs';
 import {
+  ToolChoiceType,
  ChatCompletionChunkEvent,
  ChatCompletionEventType,
  ChatCompletionTokenCountEvent,
-} from '../../../common/chat_complete';
-import { ToolChoiceType } from '../../../common/chat_complete/tools';
+} from '@kbn/inference-common';
 import { chunksIntoMessage } from './chunks_into_message';
 import type { Logger } from '@kbn/logging';

--- a/x-pack/plugins/inference/server/chat_complete/utils/chunks_into_message.ts
+++ b/x-pack/plugins/inference/server/chat_complete/utils/chunks_into_message.ts
@ -7,14 +7,15 @@

 import { last, map, merge, OperatorFunction, scan, share } from 'rxjs';
 import type { Logger } from '@kbn/logging';
-import type { UnvalidatedToolCall, ToolOptions } from '../../../common/chat_complete/tools';
 import {
+  UnvalidatedToolCall,
+  ToolOptions,
  ChatCompletionChunkEvent,
  ChatCompletionEventType,
  ChatCompletionMessageEvent,
  ChatCompletionTokenCountEvent,
-} from '../../../common/chat_complete';
-import { withoutTokenCountEvents } from '../../../common/chat_complete/without_token_count_events';
+  withoutTokenCountEvents,
+} from '@kbn/inference-common';
 import { validateToolCalls } from '../../util/validate_tool_calls';

 export function chunksIntoMessage<TToolOptions extends ToolOptions>({
--- a/x-pack/plugins/inference/server/index.ts
+++ b/x-pack/plugins/inference/server/index.ts
@ -4,25 +4,22 @@
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */
+
 import type { PluginInitializer, PluginInitializerContext } from '@kbn/core/server';
 import type { InferenceConfig } from './config';
-import { InferencePlugin } from './plugin';
 import type {
  InferenceServerSetup,
  InferenceServerStart,
  InferenceSetupDependencies,
  InferenceStartDependencies,
 } from './types';
-
-export { withoutTokenCountEvents } from '../common/chat_complete/without_token_count_events';
-export { withoutChunkEvents } from '../common/chat_complete/without_chunk_events';
-export { withoutOutputUpdateEvents } from '../common/output/without_output_update_events';
+import { InferencePlugin } from './plugin';

 export type { InferenceClient } from './types';
-export { naturalLanguageToEsql } from './tasks/nl_to_esql';
-
 export type { InferenceServerSetup, InferenceServerStart };

+export { naturalLanguageToEsql } from './tasks/nl_to_esql';
+
 export const plugin: PluginInitializer<
  InferenceServerSetup,
  InferenceServerStart,
--- a/x-pack/plugins/inference/server/inference_client/index.ts
+++ b/x-pack/plugins/inference/server/inference_client/index.ts
@ -9,7 +9,7 @@ import type { Logger } from '@kbn/logging';
 import type { KibanaRequest } from '@kbn/core-http-server';
 import type { InferenceClient, InferenceStartDependencies } from '../types';
 import { createChatCompleteApi } from '../chat_complete';
-import { createOutputApi } from '../../common/output/create_output_api';
+import { createOutputApi } from '../../common/create_output_api';
 import { getConnectorById } from '../util/get_connector_by_id';

 export function createInferenceClient({
--- a/x-pack/plugins/inference/server/routes/chat_complete.ts
+++ b/x-pack/plugins/inference/server/routes/chat_complete.ts
@ -7,9 +7,8 @@

 import { schema, Type } from '@kbn/config-schema';
 import type { CoreSetup, IRouter, Logger, RequestHandlerContext } from '@kbn/core/server';
-import { MessageRole } from '../../common/chat_complete';
-import type { ChatCompleteRequestBody } from '../../common/chat_complete/request';
-import { ToolCall, ToolChoiceType } from '../../common/chat_complete/tools';
+import { MessageRole, ToolCall, ToolChoiceType } from '@kbn/inference-common';
+import type { ChatCompleteRequestBody } from '../../common/http_apis';
 import { createInferenceClient } from '../inference_client';
 import { InferenceServerStart, InferenceStartDependencies } from '../types';
 import { observableIntoEventSourceStream } from '../util/observable_into_event_source_stream';
--- a/x-pack/plugins/inference/server/tasks/nl_to_esql/actions/generate_esql.ts
+++ b/x-pack/plugins/inference/server/tasks/nl_to_esql/actions/generate_esql.ts
@ -7,21 +7,23 @@

 import { Observable, map, merge, of, switchMap } from 'rxjs';
 import type { Logger } from '@kbn/logging';
-import { ToolCall, ToolOptions } from '../../../../common/chat_complete/tools';
 import {
-  correctCommonEsqlMistakes,
-  generateFakeToolCallId,
+  ToolCall,
+  ToolOptions,
+  withoutTokenCountEvents,
  isChatCompletionMessageEvent,
  Message,
  MessageRole,
-} from '../../../../common';
-import { InferenceClient, withoutTokenCountEvents } from '../../..';
-import { OutputCompleteEvent, OutputEventType } from '../../../../common/output';
+  OutputCompleteEvent,
+  OutputEventType,
+  FunctionCallingMode,
+} from '@kbn/inference-common';
+import { correctCommonEsqlMistakes, generateFakeToolCallId } from '../../../../common';
+import { InferenceClient } from '../../..';
 import { INLINE_ESQL_QUERY_REGEX } from '../../../../common/tasks/nl_to_esql/constants';
 import { EsqlDocumentBase } from '../doc_base';
 import { requestDocumentationSchema } from './shared';
 import type { NlToEsqlTaskEvent } from '../types';
-import type { FunctionCallingMode } from '../../../../common/chat_complete';

 export const generateEsqlTask = <TToolOptions extends ToolOptions>({
  chatCompleteApi,
--- a/x-pack/plugins/inference/server/tasks/nl_to_esql/actions/request_documentation.ts
+++ b/x-pack/plugins/inference/server/tasks/nl_to_esql/actions/request_documentation.ts
@ -6,11 +6,15 @@
 */

 import { isEmpty } from 'lodash';
-import { InferenceClient, withoutOutputUpdateEvents } from '../../..';
-import { Message } from '../../../../common';
-import { ToolChoiceType, ToolOptions } from '../../../../common/chat_complete/tools';
+import {
+  ToolChoiceType,
+  ToolOptions,
+  Message,
+  withoutOutputUpdateEvents,
+  FunctionCallingMode,
+} from '@kbn/inference-common';
+import { InferenceClient } from '../../..';
 import { requestDocumentationSchema } from './shared';
-import type { FunctionCallingMode } from '../../../../common/chat_complete';

 export const requestDocumentation = ({
  outputApi,
--- a/x-pack/plugins/inference/server/tasks/nl_to_esql/actions/shared.ts
+++ b/x-pack/plugins/inference/server/tasks/nl_to_esql/actions/shared.ts
@ -5,7 +5,7 @@
 * 2.0.
 */

-import { ToolSchema } from '../../../../common';
+import { ToolSchema } from '@kbn/inference-common';

 export const requestDocumentationSchema = {
  type: 'object',
--- a/x-pack/plugins/inference/server/tasks/nl_to_esql/task.ts
+++ b/x-pack/plugins/inference/server/tasks/nl_to_esql/task.ts
@ -7,8 +7,7 @@

 import { once } from 'lodash';
 import { Observable, from, switchMap } from 'rxjs';
-import { Message, MessageRole } from '../../../common/chat_complete';
-import type { ToolOptions } from '../../../common/chat_complete/tools';
+import { Message, MessageRole, ToolOptions } from '@kbn/inference-common';
 import { EsqlDocumentBase } from './doc_base';
 import { requestDocumentation, generateEsqlTask } from './actions';
 import { NlToEsqlTaskParams, NlToEsqlTaskEvent } from './types';
--- a/x-pack/plugins/inference/server/tasks/nl_to_esql/types.ts
+++ b/x-pack/plugins/inference/server/tasks/nl_to_esql/types.ts
@ -11,9 +11,9 @@ import type {
  ChatCompletionMessageEvent,
  FunctionCallingMode,
  Message,
-} from '../../../common/chat_complete';
-import type { ToolOptions } from '../../../common/chat_complete/tools';
-import type { OutputCompleteEvent } from '../../../common/output';
+  ToolOptions,
+  OutputCompleteEvent,
+} from '@kbn/inference-common';
 import type { InferenceClient } from '../../types';

 export type NlToEsqlTaskEvent<TToolOptions extends ToolOptions> =
--- a/x-pack/plugins/inference/server/types.ts
+++ b/x-pack/plugins/inference/server/types.ts
@ -10,9 +10,8 @@ import type {
  PluginSetupContract as ActionsPluginSetup,
 } from '@kbn/actions-plugin/server';
 import type { KibanaRequest } from '@kbn/core-http-server';
-import { ChatCompleteAPI } from '../common/chat_complete';
+import { ChatCompleteAPI, OutputAPI } from '@kbn/inference-common';
 import { InferenceConnector } from '../common/connectors';
-import { OutputAPI } from '../common/output';

 /* eslint-disable @typescript-eslint/no-empty-interface*/

--- a/x-pack/plugins/inference/server/util/get_connector_by_id.ts
+++ b/x-pack/plugins/inference/server/util/get_connector_by_id.ts
@ -6,8 +6,8 @@
 */

 import type { ActionsClient, ActionResult as ActionConnector } from '@kbn/actions-plugin/server';
+import { createInferenceRequestError } from '@kbn/inference-common';
 import { isSupportedConnectorType, type InferenceConnector } from '../../common/connectors';
-import { createInferenceRequestError } from '../../common/errors';

 /**
 * Retrieves a connector given the provided `connectorId` and asserts it's an inference connector
--- a/x-pack/plugins/inference/server/util/observable_into_event_source_stream.test.ts
+++ b/x-pack/plugins/inference/server/util/observable_into_event_source_stream.test.ts
@ -8,7 +8,7 @@
 import { createParser } from 'eventsource-parser';
 import { partition } from 'lodash';
 import { merge, of, throwError } from 'rxjs';
-import type { InferenceTaskEvent } from '../../common/inference_task';
+import type { InferenceTaskEvent } from '@kbn/inference-common';
 import { observableIntoEventSourceStream } from './observable_into_event_source_stream';
 import type { Logger } from '@kbn/logging';

--- a/x-pack/plugins/inference/server/util/observable_into_event_source_stream.ts
+++ b/x-pack/plugins/inference/server/util/observable_into_event_source_stream.ts
@ -9,11 +9,11 @@ import { catchError, map, Observable, of } from 'rxjs';
 import { PassThrough } from 'stream';
 import type { Logger } from '@kbn/logging';
 import {
+  InferenceTaskEventType,
  InferenceTaskErrorCode,
  InferenceTaskErrorEvent,
  isInferenceError,
-} from '../../common/errors';
-import { InferenceTaskEventType } from '../../common/inference_task';
+} from '@kbn/inference-common';

 export function observableIntoEventSourceStream(
  source$: Observable<unknown>,
--- a/x-pack/plugins/inference/server/util/validate_tool_calls.test.ts
+++ b/x-pack/plugins/inference/server/util/validate_tool_calls.test.ts
@ -5,8 +5,7 @@
 * 2.0.
 */

-import { isToolValidationError } from '../../common/chat_complete/errors';
-import { ToolChoiceType } from '../../common/chat_complete/tools';
+import { ToolChoiceType, isToolValidationError } from '@kbn/inference-common';
 import { validateToolCalls } from './validate_tool_calls';

 describe('validateToolCalls', () => {
--- a/x-pack/plugins/inference/server/util/validate_tool_calls.ts
+++ b/x-pack/plugins/inference/server/util/validate_tool_calls.ts
@ -5,16 +5,13 @@
 * 2.0.
 */
 import Ajv from 'ajv';
-import {
-  createToolNotFoundError,
-  createToolValidationError,
-} from '../../common/chat_complete/errors';
 import {
  ToolCallsOf,
  ToolChoiceType,
  ToolOptions,
  UnvalidatedToolCall,
-} from '../../common/chat_complete/tools';
+} from '@kbn/inference-common';
+import { createToolNotFoundError, createToolValidationError } from '../chat_complete/errors';

 export function validateToolCalls<TToolOptions extends ToolOptions>({
  toolCalls,
--- a/x-pack/plugins/inference/tsconfig.json
+++ b/x-pack/plugins/inference/tsconfig.json
@ -19,7 +19,6 @@
  ],
  "kbn_references": [
    "@kbn/i18n",
-    "@kbn/sse-utils",
    "@kbn/esql-ast",
    "@kbn/esql-validation-autocomplete",
    "@kbn/core",
@ -33,6 +32,7 @@
    "@kbn/core-http-server",
    "@kbn/actions-plugin",
    "@kbn/config-schema",
+    "@kbn/inference-common",
    "@kbn/es-types",
    "@kbn/field-types",
    "@kbn/expressions-plugin",
--- a/x-pack/plugins/observability_solution/investigate_app/server/lib/get_sample_documents.ts
+++ b/x-pack/plugins/observability_solution/investigate_app/server/lib/get_sample_documents.ts
@ -7,7 +7,7 @@
 import pLimit from 'p-limit';
 import { estypes } from '@elastic/elasticsearch';
 import { castArray, sortBy, uniq, partition, shuffle } from 'lodash';
-import { truncateList } from '@kbn/inference-plugin/common/util/truncate_list';
+import { truncateList } from '@kbn/inference-plugin/common/utils/truncate_list';
 import { QueryDslQueryContainer } from '@kbn/data-views-plugin/common/types';
 import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server';
 import { rangeQuery, excludeFrozenQuery } from './queries';
--- a/x-pack/plugins/observability_solution/observability_ai_assistant/common/types.ts
+++ b/x-pack/plugins/observability_solution/observability_ai_assistant/common/types.ts
@ -5,7 +5,7 @@
 * 2.0.
 */
 import { IconType } from '@elastic/eui';
-import type { ToolSchema } from '@kbn/inference-plugin/common';
+import type { ToolSchema } from '@kbn/inference-common';
 import type { AssistantScope } from '@kbn/ai-assistant-common';
 import type { ObservabilityAIAssistantChatService } from '../public';
 import type { FunctionResponse } from './functions/types';
--- a/x-pack/plugins/observability_solution/observability_ai_assistant/tsconfig.json
+++ b/x-pack/plugins/observability_solution/observability_ai_assistant/tsconfig.json
@ -14,7 +14,6 @@
  ],
  "kbn_references": [
    "@kbn/i18n",
-    "@kbn/inference-plugin",
    "@kbn/logging",
    "@kbn/kibana-utils-plugin",
    "@kbn/core-analytics-browser",
@ -44,9 +43,9 @@
    "@kbn/serverless",
    "@kbn/core-elasticsearch-server",
    "@kbn/core-ui-settings-server",
-    "@kbn/inference-plugin",
    "@kbn/management-settings-ids",
    "@kbn/ai-assistant-common",
+    "@kbn/inference-common",
  ],
  "exclude": ["target/**/*"]
 }
--- a/x-pack/plugins/observability_solution/observability_ai_assistant_app/common/convert_messages_for_inference.ts
+++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/common/convert_messages_for_inference.ts
@ -10,8 +10,8 @@ import {
  AssistantMessage,
  Message as InferenceMessage,
  MessageRole as InferenceMessageRole,
-  generateFakeToolCallId,
-} from '@kbn/inference-plugin/common';
+} from '@kbn/inference-common';
+import { generateFakeToolCallId } from '@kbn/inference-plugin/common';

 export function convertMessagesForInference(messages: Message[]): InferenceMessage[] {
  const inferenceMessages: InferenceMessage[] = [];
--- a/x-pack/plugins/observability_solution/observability_ai_assistant_app/server/functions/query/index.ts
+++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/server/functions/query/index.ts
@ -5,11 +5,8 @@
 * 2.0.
 */

-import {
-  correctCommonEsqlMistakes,
-  isChatCompletionChunkEvent,
-  isOutputEvent,
-} from '@kbn/inference-plugin/common';
+import { isChatCompletionChunkEvent, isOutputEvent } from '@kbn/inference-common';
+import { correctCommonEsqlMistakes } from '@kbn/inference-plugin/common';
 import { naturalLanguageToEsql } from '@kbn/inference-plugin/server';
 import {
  FunctionVisibility,
--- a/x-pack/plugins/observability_solution/observability_ai_assistant_app/tsconfig.json
+++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/tsconfig.json
@ -69,6 +69,7 @@
    "@kbn/cloud-plugin",
    "@kbn/logs-data-access-plugin",
    "@kbn/ai-assistant-common",
+    "@kbn/inference-common",
  ],
  "exclude": [
    "target/**/*"
--- a/x-pack/plugins/security_solution/server/assistant/tools/esql/nl_to_esql_tool.ts
+++ b/x-pack/plugins/security_solution/server/assistant/tools/esql/nl_to_esql_tool.ts
@ -49,11 +49,7 @@ export const NL_TO_ESQL_TOOL: AssistantTool = {
          connectorId,
          input: question,
          ...(isOssModel ? { functionCalling: 'simulated' } : {}),
-          logger: {
-            debug: (source) => {
-              logger.debug(typeof source === 'function' ? source() : source);
-            },
-          },
+          logger,
        })
      );
    };
--- a/yarn.lock
+++ b/yarn.lock
@ -5344,6 +5344,10 @@
  version "0.0.0"
  uid ""

+"@kbn/inference-common@link:x-pack/packages/ai-infra/inference-common":
+  version "0.0.0"
+  uid ""
+
 "@kbn/inference-plugin@link:x-pack/plugins/inference":
  version "0.0.0"
  uid ""