[8.x] [inference] add support for openAI native stream token count (#200745) (#201007)

# Backport This will backport the following commits from `main` to `8.x`: - [[inference] add support for openAI native stream token count (#200745)](https://github.com/elastic/kibana/pull/200745)  ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport)  Co-authored-by: Pierre Gayvallet <pierre.gayvallet@elastic.co>
2025-04-24 01:38:56 -04:00 · 2024-11-21 05:45:26 +11:00 · 2024-11-21 05:45:26 +11:00 · 63f1de7e1b
commit 63f1de7e1b
parent 43d4730ebd
9 changed files with 401 additions and 144 deletions
--- a/x-pack/plugins/actions/server/lib/get_token_count_from_openai_stream.test.ts
+++ b/x-pack/plugins/actions/server/lib/get_token_count_from_openai_stream.test.ts
@ -61,6 +61,16 @@ describe('getTokenCountFromOpenAIStream', () => {
    ],
  };

+  const usageChunk = {
+    object: 'chat.completion.chunk',
+    choices: [],
+    usage: {
+      prompt_tokens: 50,
+      completion_tokens: 100,
+      total_tokens: 150,
+    },
+  };
+
  const PROMPT_TOKEN_COUNT = 36;
  const COMPLETION_TOKEN_COUNT = 5;

@ -70,55 +80,79 @@ describe('getTokenCountFromOpenAIStream', () => {
  });

  describe('when a stream completes', () => {
-    beforeEach(async () => {
-      stream.write('data: [DONE]');
-      stream.complete();
-    });
+    describe('with usage chunk', () => {
+      it('returns the counts from the usage chunk', async () => {
+        stream = createStreamMock();
+        stream.write(`data: ${JSON.stringify(chunk)}`);
+        stream.write(`data: ${JSON.stringify(usageChunk)}`);
+        stream.write('data: [DONE]');
+        stream.complete();

-    describe('without function tokens', () => {
-      beforeEach(async () => {
        tokens = await getTokenCountFromOpenAIStream({
          responseStream: stream.transform,
          logger,
          body: JSON.stringify(body),
        });
-      });

-      it('counts the prompt tokens', () => {
-        expect(tokens.prompt).toBe(PROMPT_TOKEN_COUNT);
-        expect(tokens.completion).toBe(COMPLETION_TOKEN_COUNT);
-        expect(tokens.total).toBe(PROMPT_TOKEN_COUNT + COMPLETION_TOKEN_COUNT);
+        expect(tokens).toEqual({
+          prompt: usageChunk.usage.prompt_tokens,
+          completion: usageChunk.usage.completion_tokens,
+          total: usageChunk.usage.total_tokens,
+        });
      });
    });

-    describe('with function tokens', () => {
+    describe('without usage chunk', () => {
      beforeEach(async () => {
-        tokens = await getTokenCountFromOpenAIStream({
-          responseStream: stream.transform,
-          logger,
-          body: JSON.stringify({
-            ...body,
-            functions: [
-              {
-                name: 'my_function',
-                description: 'My function description',
-                parameters: {
-                  type: 'object',
-                  properties: {
-                    my_property: {
-                      type: 'boolean',
-                      description: 'My function property',
-                    },
-                  },
-                },
-              },
-            ],
-          }),
+        stream.write('data: [DONE]');
+        stream.complete();
+      });
+
+      describe('without function tokens', () => {
+        beforeEach(async () => {
+          tokens = await getTokenCountFromOpenAIStream({
+            responseStream: stream.transform,
+            logger,
+            body: JSON.stringify(body),
+          });
+        });
+
+        it('counts the prompt tokens', () => {
+          expect(tokens.prompt).toBe(PROMPT_TOKEN_COUNT);
+          expect(tokens.completion).toBe(COMPLETION_TOKEN_COUNT);
+          expect(tokens.total).toBe(PROMPT_TOKEN_COUNT + COMPLETION_TOKEN_COUNT);
        });
      });

-      it('counts the function tokens', () => {
-        expect(tokens.prompt).toBeGreaterThan(PROMPT_TOKEN_COUNT);
+      describe('with function tokens', () => {
+        beforeEach(async () => {
+          tokens = await getTokenCountFromOpenAIStream({
+            responseStream: stream.transform,
+            logger,
+            body: JSON.stringify({
+              ...body,
+              functions: [
+                {
+                  name: 'my_function',
+                  description: 'My function description',
+                  parameters: {
+                    type: 'object',
+                    properties: {
+                      my_property: {
+                        type: 'boolean',
+                        description: 'My function property',
+                      },
+                    },
+                  },
+                },
+              ],
+            }),
+          });
+        });
+
+        it('counts the function tokens', () => {
+          expect(tokens.prompt).toBeGreaterThan(PROMPT_TOKEN_COUNT);
+        });
      });
    });
  });
--- a/x-pack/plugins/actions/server/lib/get_token_count_from_openai_stream.ts
+++ b/x-pack/plugins/actions/server/lib/get_token_count_from_openai_stream.ts
@ -25,9 +25,91 @@ export async function getTokenCountFromOpenAIStream({
  prompt: number;
  completion: number;
 }> {
-  const chatCompletionRequest = JSON.parse(
-    body
-  ) as OpenAI.ChatCompletionCreateParams.ChatCompletionCreateParamsStreaming;
+  let responseBody = '';
+
+  responseStream.on('data', (chunk: string) => {
+    responseBody += chunk.toString();
+  });
+
+  try {
+    await finished(responseStream);
+  } catch (e) {
+    logger.error('An error occurred while calculating streaming response tokens');
+  }
+
+  let completionUsage: OpenAI.CompletionUsage | undefined;
+
+  const response: ParsedResponse = responseBody
+    .split('\n')
+    .filter((line) => {
+      return line.startsWith('data: ') && !line.endsWith('[DONE]');
+    })
+    .map((line) => {
+      return JSON.parse(line.replace('data: ', ''));
+    })
+    .filter((line): line is OpenAI.ChatCompletionChunk => {
+      return 'object' in line && line.object === 'chat.completion.chunk';
+    })
+    .reduce(
+      (prev, line) => {
+        if (line.usage) {
+          completionUsage = line.usage;
+        }
+        if (line.choices?.length) {
+          const msg = line.choices[0].delta!;
+          prev.content += msg.content || '';
+          prev.function_call.name += msg.function_call?.name || '';
+          prev.function_call.arguments += msg.function_call?.arguments || '';
+        }
+        return prev;
+      },
+      { content: '', function_call: { name: '', arguments: '' } }
+    );
+
+  // not all openAI compatible providers emit completion chunk, so we still have to support
+  // manually counting the tokens
+  if (completionUsage) {
+    return {
+      prompt: completionUsage.prompt_tokens,
+      completion: completionUsage.completion_tokens,
+      total: completionUsage.total_tokens,
+    };
+  } else {
+    const promptTokens = manuallyCountPromptTokens(body);
+    const completionTokens = manuallyCountCompletionTokens(response);
+    return {
+      prompt: promptTokens,
+      completion: completionTokens,
+      total: promptTokens + completionTokens,
+    };
+  }
+}
+
+interface ParsedResponse {
+  content: string;
+  function_call: {
+    name: string;
+    arguments: string;
+  };
+}
+
+const manuallyCountCompletionTokens = (response: ParsedResponse) => {
+  return encode(
+    JSON.stringify(
+      omitBy(
+        {
+          content: response.content || undefined,
+          function_call: response.function_call.name ? response.function_call : undefined,
+        },
+        isEmpty
+      )
+    )
+  ).length;
+};
+
+const manuallyCountPromptTokens = (requestBody: string) => {
+  const chatCompletionRequest: OpenAI.ChatCompletionCreateParams.ChatCompletionCreateParamsStreaming =
+    JSON.parse(requestBody);

  // per https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
  const tokensFromMessages = encode(
@ -60,67 +142,5 @@ export async function getTokenCountFromOpenAIStream({
      ).length
    : 0;

-  const promptTokens = tokensFromMessages + tokensFromFunctions;
-
-  let responseBody: string = '';
-
-  responseStream.on('data', (chunk: string) => {
-    responseBody += chunk.toString();
-  });
-
-  try {
-    await finished(responseStream);
-  } catch (e) {
-    logger.error('An error occurred while calculating streaming response tokens');
-  }
-
-  const response = responseBody
-    .split('\n')
-    .filter((line) => {
-      return line.startsWith('data: ') && !line.endsWith('[DONE]');
-    })
-    .map((line) => {
-      return JSON.parse(line.replace('data: ', ''));
-    })
-    .filter(
-      (
-        line
-      ): line is {
-        choices: Array<{
-          delta: { content?: string; function_call?: { name?: string; arguments: string } };
-        }>;
-      } => {
-        return (
-          'object' in line && line.object === 'chat.completion.chunk' && line.choices.length > 0
-        );
-      }
-    )
-    .reduce(
-      (prev, line) => {
-        const msg = line.choices[0].delta!;
-        prev.content += msg.content || '';
-        prev.function_call.name += msg.function_call?.name || '';
-        prev.function_call.arguments += msg.function_call?.arguments || '';
-        return prev;
-      },
-      { content: '', function_call: { name: '', arguments: '' } }
-    );
-
-  const completionTokens = encode(
-    JSON.stringify(
-      omitBy(
-        {
-          content: response.content || undefined,
-          function_call: response.function_call.name ? response.function_call : undefined,
-        },
-        isEmpty
-      )
-    )
-  ).length;
-
-  return {
-    prompt: promptTokens,
-    completion: completionTokens,
-    total: promptTokens + completionTokens,
-  };
-}
+  return tokensFromMessages + tokensFromFunctions;
+};
--- a/x-pack/plugins/inference/server/chat_complete/adapters/openai/openai_adapter.test.ts
+++ b/x-pack/plugins/inference/server/chat_complete/adapters/openai/openai_adapter.test.ts
@ -21,17 +21,19 @@ function createOpenAIChunk({
  delta,
  usage,
 }: {
-  delta: OpenAI.ChatCompletionChunk['choices'][number]['delta'];
+  delta?: OpenAI.ChatCompletionChunk['choices'][number]['delta'];
  usage?: OpenAI.ChatCompletionChunk['usage'];
 }): OpenAI.ChatCompletionChunk {
  return {
-    choices: [
-      {
-        finish_reason: null,
-        index: 0,
-        delta,
-      },
-    ],
+    choices: delta
+      ? [
+          {
+            finish_reason: null,
+            index: 0,
+            delta,
+          },
+        ]
+      : [],
    created: new Date().getTime(),
    id: v4(),
    model: 'gpt-4o',
@ -313,7 +315,7 @@ describe('openAIAdapter', () => {
      ]);
    });

-    it('emits token events', async () => {
+    it('emits chunk events with tool calls', async () => {
      const response$ = openAIAdapter.chatComplete({
        ...defaultArgs,
        messages: [
@ -375,5 +377,55 @@ describe('openAIAdapter', () => {
        },
      ]);
    });
+
+    it('emits token count events', async () => {
+      const response$ = openAIAdapter.chatComplete({
+        ...defaultArgs,
+        messages: [
+          {
+            role: MessageRole.User,
+            content: 'Hello',
+          },
+        ],
+      });
+
+      source$.next(
+        createOpenAIChunk({
+          delta: {
+            content: 'chunk',
+          },
+        })
+      );
+
+      source$.next(
+        createOpenAIChunk({
+          usage: {
+            prompt_tokens: 50,
+            completion_tokens: 100,
+            total_tokens: 150,
+          },
+        })
+      );
+
+      source$.complete();
+
+      const allChunks = await lastValueFrom(response$.pipe(toArray()));
+
+      expect(allChunks).toEqual([
+        {
+          type: ChatCompletionEventType.ChatCompletionChunk,
+          content: 'chunk',
+          tool_calls: [],
+        },
+        {
+          type: ChatCompletionEventType.ChatCompletionTokenCount,
+          tokens: {
+            prompt: 50,
+            completion: 100,
+            total: 150,
+          },
+        },
+      ]);
+    });
  });
 });
--- a/x-pack/plugins/inference/server/chat_complete/adapters/openai/openai_adapter.ts
+++ b/x-pack/plugins/inference/server/chat_complete/adapters/openai/openai_adapter.ts
@ -5,7 +5,7 @@
 * 2.0.
 */

-import OpenAI from 'openai';
+import type OpenAI from 'openai';
 import type {
  ChatCompletionAssistantMessageParam,
  ChatCompletionMessageParam,
@ -13,22 +13,33 @@ import type {
  ChatCompletionToolMessageParam,
  ChatCompletionUserMessageParam,
 } from 'openai/resources';
-import { filter, from, map, switchMap, tap, throwError, identity } from 'rxjs';
-import { Readable, isReadable } from 'stream';
+import {
+  filter,
+  from,
+  identity,
+  map,
+  mergeMap,
+  Observable,
+  switchMap,
+  tap,
+  throwError,
+} from 'rxjs';
+import { isReadable, Readable } from 'stream';
 import {
  ChatCompletionChunkEvent,
  ChatCompletionEventType,
+  ChatCompletionTokenCountEvent,
+  createInferenceInternalError,
  Message,
  MessageRole,
  ToolOptions,
-  createInferenceInternalError,
 } from '@kbn/inference-common';
 import { createTokenLimitReachedError } from '../../errors';
 import { eventSourceStreamIntoObservable } from '../../../util/event_source_stream_into_observable';
 import type { InferenceConnectorAdapter } from '../../types';
 import {
-  wrapWithSimulatedFunctionCalling,
  parseInlineFunctionCalls,
+  wrapWithSimulatedFunctionCalling,
 } from '../../simulated_function_calling';

 export const openAIAdapter: InferenceConnectorAdapter = {
@ -92,34 +103,57 @@ export const openAIAdapter: InferenceConnectorAdapter = {
          throw createTokenLimitReachedError();
        }
      }),
-      filter(
-        (line): line is OpenAI.ChatCompletionChunk =>
-          'object' in line && line.object === 'chat.completion.chunk' && line.choices.length > 0
-      ),
-      map((chunk): ChatCompletionChunkEvent => {
-        const delta = chunk.choices[0].delta;
-
-        return {
-          type: ChatCompletionEventType.ChatCompletionChunk,
-          content: delta.content ?? '',
-          tool_calls:
-            delta.tool_calls?.map((toolCall) => {
-              return {
-                function: {
-                  name: toolCall.function?.name ?? '',
-                  arguments: toolCall.function?.arguments ?? '',
-                },
-                toolCallId: toolCall.id ?? '',
-                index: toolCall.index,
-              };
-            }) ?? [],
-        };
+      filter((line): line is OpenAI.ChatCompletionChunk => {
+        return 'object' in line && line.object === 'chat.completion.chunk';
+      }),
+      mergeMap((chunk): Observable<ChatCompletionChunkEvent | ChatCompletionTokenCountEvent> => {
+        const events: Array<ChatCompletionChunkEvent | ChatCompletionTokenCountEvent> = [];
+        if (chunk.usage) {
+          events.push(tokenCountFromOpenAI(chunk.usage));
+        }
+        if (chunk.choices?.length) {
+          events.push(chunkFromOpenAI(chunk));
+        }
+        return from(events);
      }),
      simulatedFunctionCalling ? parseInlineFunctionCalls({ logger }) : identity
    );
  },
 };

+function chunkFromOpenAI(chunk: OpenAI.ChatCompletionChunk): ChatCompletionChunkEvent {
+  const delta = chunk.choices[0].delta;
+
+  return {
+    type: ChatCompletionEventType.ChatCompletionChunk,
+    content: delta.content ?? '',
+    tool_calls:
+      delta.tool_calls?.map((toolCall) => {
+        return {
+          function: {
+            name: toolCall.function?.name ?? '',
+            arguments: toolCall.function?.arguments ?? '',
+          },
+          toolCallId: toolCall.id ?? '',
+          index: toolCall.index,
+        };
+      }) ?? [],
+  };
+}
+
+function tokenCountFromOpenAI(
+  completionUsage: OpenAI.CompletionUsage
+): ChatCompletionTokenCountEvent {
+  return {
+    type: ChatCompletionEventType.ChatCompletionTokenCount,
+    tokens: {
+      completion: completionUsage.completion_tokens,
+      prompt: completionUsage.prompt_tokens,
+      total: completionUsage.total_tokens,
+    },
+  };
+}
+
 function toolsToOpenAI(tools: ToolOptions['tools']): OpenAI.ChatCompletionCreateParams['tools'] {
  return tools
    ? Object.entries(tools).map(([toolName, { description, schema }]) => {
--- a/x-pack/plugins/stack_connectors/server/connector_types/openai/lib/azure_openai_utils.test.ts
+++ b/x-pack/plugins/stack_connectors/server/connector_types/openai/lib/azure_openai_utils.test.ts
@ -101,9 +101,50 @@ describe('Azure Open AI Utils', () => {
      };
      [chatUrl, completionUrl, completionExtensionsUrl].forEach((url: string) => {
        const sanitizedBodyString = getRequestWithStreamOption(url, JSON.stringify(body), true);
-        expect(sanitizedBodyString).toEqual(
-          `{\"messages\":[{\"role\":\"user\",\"content\":\"This is a test\"}],\"stream\":true}`
-        );
+        expect(JSON.parse(sanitizedBodyString)).toEqual({
+          messages: [{ content: 'This is a test', role: 'user' }],
+          stream: true,
+          stream_options: {
+            include_usage: true,
+          },
+        });
+      });
+    });
+    it('sets stream_options when stream is true', () => {
+      const body = {
+        messages: [
+          {
+            role: 'user',
+            content: 'This is a test',
+          },
+        ],
+      };
+      [chatUrl, completionUrl, completionExtensionsUrl].forEach((url: string) => {
+        const sanitizedBodyString = getRequestWithStreamOption(url, JSON.stringify(body), true);
+        expect(JSON.parse(sanitizedBodyString)).toEqual({
+          messages: [{ content: 'This is a test', role: 'user' }],
+          stream: true,
+          stream_options: {
+            include_usage: true,
+          },
+        });
+      });
+    });
+    it('does not sets stream_options when stream is false', () => {
+      const body = {
+        messages: [
+          {
+            role: 'user',
+            content: 'This is a test',
+          },
+        ],
+      };
+      [chatUrl, completionUrl, completionExtensionsUrl].forEach((url: string) => {
+        const sanitizedBodyString = getRequestWithStreamOption(url, JSON.stringify(body), false);
+        expect(JSON.parse(sanitizedBodyString)).toEqual({
+          messages: [{ content: 'This is a test', role: 'user' }],
+          stream: false,
+        });
      });
    });
    it('overrides stream parameter if defined in body', () => {
--- a/x-pack/plugins/stack_connectors/server/connector_types/openai/lib/azure_openai_utils.ts
+++ b/x-pack/plugins/stack_connectors/server/connector_types/openai/lib/azure_openai_utils.ts
@ -48,6 +48,11 @@ export const getRequestWithStreamOption = (url: string, body: string, stream: bo
    const jsonBody = JSON.parse(body);
    if (jsonBody) {
      jsonBody.stream = stream;
+      if (stream) {
+        jsonBody.stream_options = {
+          include_usage: true,
+        };
+      }
    }

    return JSON.stringify(jsonBody);
--- a/x-pack/plugins/stack_connectors/server/connector_types/openai/lib/openai_utils.test.ts
+++ b/x-pack/plugins/stack_connectors/server/connector_types/openai/lib/openai_utils.test.ts
@ -118,6 +118,31 @@ describe('Open AI Utils', () => {
        ],
      };

+      [OPENAI_CHAT_URL, OPENAI_LEGACY_COMPLETION_URL].forEach((url: string) => {
+        const sanitizedBodyString = getRequestWithStreamOption(
+          url,
+          JSON.stringify(body),
+          false,
+          DEFAULT_OPENAI_MODEL
+        );
+        expect(JSON.parse(sanitizedBodyString)).toEqual({
+          messages: [{ content: 'This is a test', role: 'user' }],
+          model: 'gpt-4',
+          stream: false,
+        });
+      });
+    });
+    it('sets stream_options when stream is true', () => {
+      const body = {
+        model: 'gpt-4',
+        messages: [
+          {
+            role: 'user',
+            content: 'This is a test',
+          },
+        ],
+      };
+
      [OPENAI_CHAT_URL, OPENAI_LEGACY_COMPLETION_URL].forEach((url: string) => {
        const sanitizedBodyString = getRequestWithStreamOption(
          url,
@ -125,9 +150,39 @@ describe('Open AI Utils', () => {
          true,
          DEFAULT_OPENAI_MODEL
        );
-        expect(sanitizedBodyString).toEqual(
-          `{\"model\":\"gpt-4\",\"messages\":[{\"role\":\"user\",\"content\":\"This is a test\"}],\"stream\":true}`
+        expect(JSON.parse(sanitizedBodyString)).toEqual({
+          messages: [{ content: 'This is a test', role: 'user' }],
+          model: 'gpt-4',
+          stream: true,
+          stream_options: {
+            include_usage: true,
+          },
+        });
+      });
+    });
+    it('does not set stream_options when stream is false', () => {
+      const body = {
+        model: 'gpt-4',
+        messages: [
+          {
+            role: 'user',
+            content: 'This is a test',
+          },
+        ],
+      };
+
+      [OPENAI_CHAT_URL, OPENAI_LEGACY_COMPLETION_URL].forEach((url: string) => {
+        const sanitizedBodyString = getRequestWithStreamOption(
+          url,
+          JSON.stringify(body),
+          false,
+          DEFAULT_OPENAI_MODEL
        );
+        expect(JSON.parse(sanitizedBodyString)).toEqual({
+          messages: [{ content: 'This is a test', role: 'user' }],
+          model: 'gpt-4',
+          stream: false,
+        });
      });
    });

@ -182,6 +237,7 @@ describe('Open AI Utils', () => {
      expect(sanitizedBodyString).toEqual(bodyString);
    });
  });
+
  describe('removeEndpointFromUrl', () => {
    test('removes "/chat/completions" from the end of the URL', () => {
      const originalUrl = 'https://api.openai.com/v1/chat/completions';
--- a/x-pack/plugins/stack_connectors/server/connector_types/openai/lib/openai_utils.ts
+++ b/x-pack/plugins/stack_connectors/server/connector_types/openai/lib/openai_utils.ts
@ -38,6 +38,11 @@ export const getRequestWithStreamOption = (
    if (jsonBody) {
      if (APIS_ALLOWING_STREAMING.has(url)) {
        jsonBody.stream = stream;
+        if (stream) {
+          jsonBody.stream_options = {
+            include_usage: true,
+          };
+        }
      }
      jsonBody.model = jsonBody.model || defaultModel;
    }
--- a/x-pack/plugins/stack_connectors/server/connector_types/openai/openai.test.ts
+++ b/x-pack/plugins/stack_connectors/server/connector_types/openai/openai.test.ts
@ -292,6 +292,7 @@ describe('OpenAIConnector', () => {
            data: JSON.stringify({
              ...sampleOpenAiBody,
              stream: true,
+              stream_options: { include_usage: true },
              model: DEFAULT_OPENAI_MODEL,
            }),
            headers: {
@ -338,6 +339,7 @@ describe('OpenAIConnector', () => {
            data: JSON.stringify({
              ...body,
              stream: true,
+              stream_options: { include_usage: true },
            }),
            headers: {
              Authorization: 'Bearer 123',
@ -397,6 +399,7 @@ describe('OpenAIConnector', () => {
            data: JSON.stringify({
              ...sampleOpenAiBody,
              stream: true,
+              stream_options: { include_usage: true },
              model: DEFAULT_OPENAI_MODEL,
            }),
            headers: {
@ -422,6 +425,7 @@ describe('OpenAIConnector', () => {
            data: JSON.stringify({
              ...sampleOpenAiBody,
              stream: true,
+              stream_options: { include_usage: true },
              model: DEFAULT_OPENAI_MODEL,
            }),
            headers: {
@ -448,6 +452,7 @@ describe('OpenAIConnector', () => {
            data: JSON.stringify({
              ...sampleOpenAiBody,
              stream: true,
+              stream_options: { include_usage: true },
              model: DEFAULT_OPENAI_MODEL,
            }),
            headers: {
@ -1274,7 +1279,11 @@ describe('OpenAIConnector', () => {
            url: 'https://My-test-resource-123.openai.azure.com/openai/deployments/NEW-DEPLOYMENT-321/chat/completions?api-version=2023-05-15',
            method: 'post',
            responseSchema: StreamingResponseSchema,
-            data: JSON.stringify({ ...sampleAzureAiBody, stream: true }),
+            data: JSON.stringify({
+              ...sampleAzureAiBody,
+              stream: true,
+              stream_options: { include_usage: true },
+            }),
            headers: {
              'api-key': '123',
              'content-type': 'application/json',
@ -1314,6 +1323,7 @@ describe('OpenAIConnector', () => {
            data: JSON.stringify({
              ...body,
              stream: true,
+              stream_options: { include_usage: true },
            }),
            headers: {
              'api-key': '123',