[8.15] [Playground] [Bug] Previous messages now sent to LLM (#188123) (#188244)

# Backport This will backport the following commits from `main` to `8.15`: - [[Playground] [Bug] Previous messages now sent to LLM (#188123)](https://github.com/elastic/kibana/pull/188123)  ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport)  Co-authored-by: Joe McElroy <joseph.mcelroy@elastic.co>
2025-04-23 17:28:26 -04:00 · 2024-07-12 19:36:31 +02:00 · 2024-07-12 19:36:31 +02:00 · 98895c5a1b
commit 98895c5a1b
parent e6c0563419
9 changed files with 109 additions and 94 deletions
--- a/x-pack/plugins/search_playground/common/prompt.test.ts
+++ b/x-pack/plugins/search_playground/common/prompt.test.ts
@ -21,8 +21,8 @@ describe('Prompt function', () => {
        Instructions:
        
        - Provide an explanation of the process.
-        - Answer questions truthfully and factually using only the information presented.
-        - If you don't know the answer, just say that you don't know, don't make up an answer!
+        - Answer questions truthfully and factually using only the context presented.
+        - If you don't know the answer, just say that you don't know, don't make up an answer.
        - You must always cite the document where the answer was extracted using inline academic citation style [], using the position.
        - Use markdown format for code examples.
        - You are correct, factual, precise, and reliable.
@ -31,8 +31,7 @@ describe('Prompt function', () => {
        Context:
        {context}

-        Question: {question}
-        Answer:
+        
        "
    `);
  });
@ -45,8 +44,8 @@ describe('Prompt function', () => {
      "
        <s>[INST]
        - Explain the significance of the findings.
-        - Answer questions truthfully and factually using only the information presented.
-        - If you don't know the answer, just say that you don't know, don't make up an answer!
+        - Answer questions truthfully and factually using only the context presented.
+        - If you don't know the answer, just say that you don't know, don't make up an answer.
        
        - Use markdown format for code examples.
        - You are correct, factual, precise, and reliable.
@ -55,10 +54,10 @@ describe('Prompt function', () => {
        [INST]
        Context:
        {context}
-
-        Question: {question}
-        Answer:
        [/INST]
+
+        
+
        "
    `);
  });
@ -72,7 +71,7 @@ describe('Prompt function', () => {
        <instructions>
        - Summarize the key points of the article.
        
-        - If you don't know the answer, just say that you don't know, don't make up an answer!
+        - If you don't know the answer, just say that you don't know, don't make up an answer.
        
        - Use markdown format for code examples.
        - You are correct, factual, precise, and reliable.
@ -82,7 +81,7 @@ describe('Prompt function', () => {
        {context}
        </context>

-        <input>{question}</input>
+        
        "
    `);
  });
--- a/x-pack/plugins/search_playground/common/prompt.ts
+++ b/x-pack/plugins/search_playground/common/prompt.ts
@ -5,7 +5,7 @@
 * 2.0.
 */

-const OpenAIPrompt = (systemInstructions: string) => {
+const OpenAIPrompt = (systemInstructions: string, question?: boolean) => {
  return `
  Instructions:
  ${systemInstructions}
@ -13,27 +13,26 @@ const OpenAIPrompt = (systemInstructions: string) => {
  Context:
  {context}

-  Question: {question}
-  Answer:
+  ${question ? 'follow up question: {question}' : ''}
  `;
 };

-const MistralPrompt = (systemInstructions: string) => {
+const MistralPrompt = (systemInstructions: string, question?: boolean) => {
  return `
  <s>[INST]${systemInstructions}[/INST] </s>

  [INST]
  Context:
  {context}
-
-  Question: {question}
-  Answer:
  [/INST]
+
+  ${question ? '[INST]follow up question: {question}[/INST]' : ''}
+
  `;
 };

 // https://docs.anthropic.com/claude/docs/use-xml-tags
-const AnthropicPrompt = (systemInstructions: string) => {
+const AnthropicPrompt = (systemInstructions: string, question?: boolean) => {
  return `
  <instructions>${systemInstructions}</instructions>

@ -41,11 +40,11 @@ const AnthropicPrompt = (systemInstructions: string) => {
  {context}
  </context>

-  <input>{question}</input>
+  ${question ? '<input>{question}</input>' : ''}
  `;
 };

-const GeminiPrompt = (systemInstructions: string) => {
+const GeminiPrompt = (systemInstructions: string, question?: boolean) => {
  return `
  Instructions:
  ${systemInstructions}
@ -53,8 +52,8 @@ const GeminiPrompt = (systemInstructions: string) => {
  Context:
  {context}

-  Question: {question}
-  Answer:
+  ${question ? 'follow up question: {question}' : ''}
+
  `;
 };

@ -69,10 +68,10 @@ export const Prompt = (instructions: string, options: PromptTemplateOptions): st
  - ${instructions}
  ${
    options.context
-      ? '- Answer questions truthfully and factually using only the information presented.'
+      ? '- Answer questions truthfully and factually using only the context presented.'
      : ''
  }
-  - If you don't know the answer, just say that you don't know, don't make up an answer!
+  - If you don't know the answer, just say that you don't know, don't make up an answer.
  ${
    options.citations
      ? '- You must always cite the document where the answer was extracted using inline academic citation style [], using the position.'
@ -87,7 +86,7 @@ export const Prompt = (instructions: string, options: PromptTemplateOptions): st
    mistral: MistralPrompt,
    anthropic: AnthropicPrompt,
    gemini: GeminiPrompt,
-  }[options.type || 'openai'](systemInstructions);
+  }[options.type || 'openai'](systemInstructions, false);
 };

 interface QuestionRewritePromptOptions {
@ -95,11 +94,11 @@ interface QuestionRewritePromptOptions {
 }

 export const QuestionRewritePrompt = (options: QuestionRewritePromptOptions): string => {
-  const systemInstructions = `Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. Rewrite the question in the question language. Keep the answer to a single sentence. Do not use quotes.`;
+  const systemInstructions = `Given the following conversation context and a follow up question, rephrase the follow up question to be a standalone question. Rewrite the question in the question language. Keep the answer to a single sentence. Do not use quotes.`;
  return {
    openai: OpenAIPrompt,
    mistral: MistralPrompt,
    anthropic: AnthropicPrompt,
    gemini: GeminiPrompt,
-  }[options.type || 'openai'](systemInstructions);
+  }[options.type || 'openai'](systemInstructions, true);
 };
--- a/x-pack/plugins/search_playground/common/types.ts
+++ b/x-pack/plugins/search_playground/common/types.ts
@ -7,12 +7,24 @@

 export type IndicesQuerySourceFields = Record<string, QuerySourceFields>;

+export enum MessageRole {
+  'user' = 'human',
+  'assistant' = 'assistant',
+  'system' = 'system',
+}
+
 interface ModelField {
  field: string;
  model_id: string;
  indices: string[];
 }

+export interface ChatMessage {
+  id: string;
+  role: MessageRole;
+  content: string;
+}
+
 interface SemanticField {
  field: string;
  inferenceId: string;
--- a/x-pack/plugins/search_playground/public/components/view_code/examples/snapshots/py_lang_client.test.tsx.snap
+++ b/x-pack/plugins/search_playground/public/components/view_code/examples/snapshots/py_lang_client.test.tsx.snap
@ -54,8 +54,8 @@ def create_openai_prompt(question, results):
  Instructions:
  
  - Your prompt
-  - Answer questions truthfully and factually using only the information presented.
-  - If you don't know the answer, just say that you don't know, don't make up an answer!
+  - Answer questions truthfully and factually using only the context presented.
+  - If you don't know the answer, just say that you don't know, don't make up an answer.
  - You must always cite the document where the answer was extracted using inline academic citation style [], using the position.
  - Use markdown format for code examples.
  - You are correct, factual, precise, and reliable.
@ -64,18 +64,17 @@ def create_openai_prompt(question, results):
  Context:
  {context}

-  Question: {question}
-  Answer:
+  
  \\"\\"\\"

    return prompt

-def generate_openai_completion(user_prompt):
+def generate_openai_completion(user_prompt, question):
    response = openai_client.chat.completions.create(
        model=\\"gpt-3.5-turbo\\",
        messages=[
-            {\\"role\\": \\"system\\", \\"content\\": \\"You are an assistant for question-answering tasks.\\"},
-            {\\"role\\": \\"user\\", \\"content\\": user_prompt},
+            {\\"role\\": \\"system\\", \\"content\\": user_prompt},
+            {\\"role\\": \\"user\\", \\"content\\": question},
        ]
    )

@ -84,8 +83,8 @@ def generate_openai_completion(user_prompt):
 if __name__ == \\"__main__\\":
    question = \\"my question\\"
    elasticsearch_results = get_elasticsearch_results(question)
-    context_prompt = create_openai_prompt(question, elasticsearch_results)
-    openai_completion = generate_openai_completion(context_prompt)
+    context_prompt = create_openai_prompt(elasticsearch_results)
+    openai_completion = generate_openai_completion(context_prompt, question)
    print(openai_completion)

 "
--- a/x-pack/plugins/search_playground/public/components/view_code/examples/snapshots/py_langchain_python.test.tsx.snap
+++ b/x-pack/plugins/search_playground/public/components/view_code/examples/snapshots/py_langchain_python.test.tsx.snap
@ -43,8 +43,8 @@ ANSWER_PROMPT = ChatPromptTemplate.from_template(
  Instructions:
  
  - Your prompt
-  - Answer questions truthfully and factually using only the information presented.
-  - If you don't know the answer, just say that you don't know, don't make up an answer!
+  - Answer questions truthfully and factually using only the context presented.
+  - If you don't know the answer, just say that you don't know, don't make up an answer.
  - You must always cite the document where the answer was extracted using inline academic citation style [], using the position.
  - Use markdown format for code examples.
  - You are correct, factual, precise, and reliable.
@ -53,8 +53,7 @@ ANSWER_PROMPT = ChatPromptTemplate.from_template(
  Context:
  {context}

-  Question: {question}
-  Answer:
+  
  \\"\\"\\"
 )

--- a/x-pack/plugins/search_playground/public/components/view_code/examples/py_lang_client.tsx
+++ b/x-pack/plugins/search_playground/public/components/view_code/examples/py_lang_client.tsx
@ -58,12 +58,12 @@ def create_openai_prompt(question, results):

    return prompt

-def generate_openai_completion(user_prompt):
+def generate_openai_completion(user_prompt, question):
    response = openai_client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
-            {"role": "system", "content": "You are an assistant for question-answering tasks."},
-            {"role": "user", "content": user_prompt},
+            {"role": "system", "content": user_prompt},
+            {"role": "user", "content": question},
        ]
    )

@ -72,8 +72,8 @@ def generate_openai_completion(user_prompt):
 if __name__ == "__main__":
    question = "my question"
    elasticsearch_results = get_elasticsearch_results(question)
-    context_prompt = create_openai_prompt(question, elasticsearch_results)
-    openai_completion = generate_openai_completion(context_prompt)
+    context_prompt = create_openai_prompt(elasticsearch_results)
+    openai_completion = generate_openai_completion(context_prompt, question)
    print(openai_completion)

 `}
--- a/x-pack/plugins/search_playground/public/types.ts
+++ b/x-pack/plugins/search_playground/public/types.ts
@ -22,7 +22,7 @@ import { TriggersAndActionsUIPublicPluginStart } from '@kbn/triggers-actions-ui-
 import { AppMountParameters } from '@kbn/core/public';
 import { UsageCollectionStart } from '@kbn/usage-collection-plugin/public';
 import type { ConsolePluginStart } from '@kbn/console-plugin/public';
-import { ChatRequestData } from '../common/types';
+import { ChatRequestData, MessageRole } from '../common/types';
 import type { App } from './components/app';
 import type { PlaygroundProvider as PlaygroundProviderComponent } from './providers/playground_provider';
 import { PlaygroundHeaderDocs } from './components/playground_header_docs';
@ -80,12 +80,6 @@ export interface ChatForm {
  [ChatFormFields.queryFields]: { [index: string]: string[] };
 }

-export enum MessageRole {
-  'user' = 'human',
-  'assistant' = 'assistant',
-  'system' = 'system',
-}
-
 export interface Message {
  id: string;
  content: string | React.ReactNode;
--- a/x-pack/plugins/search_playground/server/lib/conversational_chain.test.ts
+++ b/x-pack/plugins/search_playground/server/lib/conversational_chain.test.ts
@ -9,9 +9,10 @@ import type { Client } from '@elastic/elasticsearch';
 import { BaseChatModel } from '@langchain/core/language_models/chat_models';
 import { ChatPromptTemplate } from '@langchain/core/prompts';
 import { FakeListChatModel, FakeStreamingLLM } from '@langchain/core/utils/testing';
-import { Message, experimental_StreamData } from 'ai';
+import { experimental_StreamData } from 'ai';
 import { createAssist as Assist } from '../utils/assist';
 import { ConversationalChain, clipContext } from './conversational_chain';
+import { ChatMessage, MessageRole } from '../types';

 describe('conversational chain', () => {
  const createTestChain = async ({
@ -28,7 +29,7 @@ describe('conversational chain', () => {
    modelLimit,
  }: {
    responses: string[];
-    chat: Message[];
+    chat: ChatMessage[];
    expectedFinalAnswer: string;
    expectedDocs: any;
    expectedTokens: any;
@ -96,8 +97,8 @@ describe('conversational chain', () => {
        size: 3,
        inputTokensLimit: modelLimit,
      },
-      prompt: 'you are a QA bot {question} {chat_history} {context}',
-      questionRewritePrompt: 'rewrite question {question} using {chat_history}"',
+      prompt: 'you are a QA bot {context}',
+      questionRewritePrompt: 'rewrite question {question} using {context}"',
    });

    const stream = await conversationalChain.stream(aiClient, chat);
@ -146,7 +147,7 @@ describe('conversational chain', () => {
      chat: [
        {
          id: '1',
-          role: 'user',
+          role: MessageRole.user,
          content: 'what is the work from home policy?',
        },
      ],
@ -180,7 +181,7 @@ describe('conversational chain', () => {
      chat: [
        {
          id: '1',
-          role: 'user',
+          role: MessageRole.user,
          content: 'what is the work from home policy?',
        },
      ],
@ -215,7 +216,7 @@ describe('conversational chain', () => {
      chat: [
        {
          id: '1',
-          role: 'user',
+          role: MessageRole.user,
          content: 'what is the work from home policy?',
        },
      ],
@ -266,17 +267,17 @@ describe('conversational chain', () => {
      chat: [
        {
          id: '1',
-          role: 'user',
+          role: MessageRole.user,
          content: 'what is the work from home policy?',
        },
        {
          id: '2',
-          role: 'assistant',
+          role: MessageRole.assistant,
          content: 'the final answer',
        },
        {
          id: '3',
-          role: 'user',
+          role: MessageRole.user,
          content: 'what is the work from home policy?',
        },
      ],
@ -292,7 +293,7 @@ describe('conversational chain', () => {
      ],
      expectedTokens: [
        { type: 'context_token_count', count: 15 },
-        { type: 'prompt_token_count', count: 38 },
+        { type: 'prompt_token_count', count: 39 },
      ],
      expectedSearchRequest: [
        {
@ -310,17 +311,17 @@ describe('conversational chain', () => {
      chat: [
        {
          id: '1',
-          role: 'user',
+          role: MessageRole.user,
          content: 'what is the work from home policy?',
        },
        {
          id: '2',
-          role: 'assistant',
+          role: MessageRole.assistant,
          content: 'the final answer',
        },
        {
          id: '3',
-          role: 'user',
+          role: MessageRole.user,
          content: 'what is the work from home policy?',
        },
      ],
@ -336,7 +337,7 @@ describe('conversational chain', () => {
      ],
      expectedTokens: [
        { type: 'context_token_count', count: 15 },
-        { type: 'prompt_token_count', count: 40 },
+        { type: 'prompt_token_count', count: 39 },
      ],
      expectedSearchRequest: [
        {
@ -354,17 +355,17 @@ describe('conversational chain', () => {
      chat: [
        {
          id: '1',
-          role: 'user',
+          role: MessageRole.user,
          content: 'what is the work from home policy?',
        },
        {
          id: '2',
-          role: 'assistant',
+          role: MessageRole.assistant,
          content: 'the final answer',
        },
        {
          id: '3',
-          role: 'user',
+          role: MessageRole.user,
          content: 'what is the work from home policy?',
        },
      ],
@ -380,7 +381,7 @@ describe('conversational chain', () => {
      ],
      expectedTokens: [
        { type: 'context_token_count', count: 15 },
-        { type: 'prompt_token_count', count: 42 },
+        { type: 'prompt_token_count', count: 49 },
      ],
      expectedSearchRequest: [
        {
@ -399,17 +400,17 @@ describe('conversational chain', () => {
      chat: [
        {
          id: '1',
-          role: 'user',
+          role: MessageRole.user,
          content: 'what is the work from home policy?',
        },
        {
          id: '2',
-          role: 'assistant',
+          role: MessageRole.assistant,
          content: 'the final answer',
        },
        {
          id: '3',
-          role: 'user',
+          role: MessageRole.user,
          content: 'what is the work from home policy?',
        },
      ],
@ -445,8 +446,8 @@ describe('conversational chain', () => {
      ],
      // Even with body_content of 1000, the token count should be below or equal to model limit of 100
      expectedTokens: [
-        { type: 'context_token_count', count: 70 },
-        { type: 'prompt_token_count', count: 97 },
+        { type: 'context_token_count', count: 65 },
+        { type: 'prompt_token_count', count: 99 },
      ],
      expectedHasClipped: true,
      expectedSearchRequest: [
--- a/x-pack/plugins/search_playground/server/lib/conversational_chain.ts
+++ b/x-pack/plugins/search_playground/server/lib/conversational_chain.ts
@ -7,16 +7,18 @@

 import { SearchHit } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
 import { Document } from '@langchain/core/documents';
-import { ChatPromptTemplate, PromptTemplate } from '@langchain/core/prompts';
+import {
+  ChatPromptTemplate,
+  PromptTemplate,
+  SystemMessagePromptTemplate,
+} from '@langchain/core/prompts';
 import { Runnable, RunnableLambda, RunnableSequence } from '@langchain/core/runnables';
 import { BytesOutputParser, StringOutputParser } from '@langchain/core/output_parsers';
-import {
-  createStreamDataTransformer,
-  experimental_StreamData,
-  Message as VercelChatMessage,
-} from 'ai';
+import { createStreamDataTransformer, experimental_StreamData } from 'ai';
 import { BaseLanguageModel } from '@langchain/core/language_models/base';
 import { BaseMessage } from '@langchain/core/messages';
+import { HumanMessage, AIMessage } from '@langchain/core/messages';
+import { ChatMessage, MessageRole } from '../types';
 import { ElasticsearchRetriever } from './elasticsearch_retriever';
 import { renderTemplate } from '../utils/render_template';

@ -47,19 +49,27 @@ interface ContextInputs {
  question: string;
 }

-const formatVercelMessages = (chatHistory: VercelChatMessage[]) => {
+const getSerialisedMessages = (chatHistory: ChatMessage[]) => {
  const formattedDialogueTurns = chatHistory.map((message) => {
-    if (message.role === 'user') {
+    if (message.role === MessageRole.user) {
      return `Human: ${message.content}`;
-    } else if (message.role === 'assistant') {
+    } else if (message.role === MessageRole.assistant) {
      return `Assistant: ${message.content}`;
-    } else {
-      return `${message.role}: ${message.content}`;
    }
  });
  return formattedDialogueTurns.join('\n');
 };

+const getMessages = (chatHistory: ChatMessage[]) => {
+  return chatHistory.map((message) => {
+    if (message.role === 'human') {
+      return new HumanMessage(message.content);
+    } else {
+      return new AIMessage(message.content);
+    }
+  });
+};
+
 const buildContext = (docs: Document[]) => {
  const serializedDocs = docs.map((doc, i) =>
    renderTemplate(
@ -127,7 +137,7 @@ class ConversationalChainFn {
    this.options = options;
  }

-  async stream(client: AssistClient, msgs: VercelChatMessage[]) {
+  async stream(client: AssistClient, msgs: ChatMessage[]) {
    const data = new experimental_StreamData();

    const messages = msgs ?? [];
@ -136,7 +146,7 @@ class ConversationalChainFn {
    const retrievedDocs: Document[] = [];

    let retrievalChain: Runnable = RunnableLambda.from(() => '');
-    const chatHistory = formatVercelMessages(previousMessages);
+    const chatHistory = getSerialisedMessages(previousMessages);

    if (this.options.rag) {
      const retriever = new ElasticsearchRetriever({
@ -161,8 +171,7 @@ class ConversationalChainFn {
      );
      standaloneQuestionChain = RunnableSequence.from([
        {
-          context: () => '',
-          chat_history: (input) => input.chat_history,
+          context: (input) => input.chat_history,
          question: (input) => input.question,
        },
        questionRewritePromptTemplate,
@ -175,12 +184,15 @@ class ConversationalChainFn {
      });
    }

-    const prompt = ChatPromptTemplate.fromTemplate(this.options.prompt);
+    const lcMessages = getMessages(messages);
+    const prompt = ChatPromptTemplate.fromMessages([
+      SystemMessagePromptTemplate.fromTemplate(this.options.prompt),
+      ...lcMessages,
+    ]);

    const answerChain = RunnableSequence.from([
      {
        context: RunnableSequence.from([(input) => input.question, retrievalChain]),
-        chat_history: (input) => input.chat_history,
        question: (input) => input.question,
      },
      RunnableLambda.from(clipContext(this.options?.rag?.inputTokensLimit, prompt, data)),