[Search] [Playground ]handle error LLM edge cases (#189509)

## Summary Fixes: - sending the error message as part of the conversation. The LLM picks up on it and reduces the effectiveness of the answer - clear chat is now enabled when error has occured - better error messages from retriever and LLM ### Checklist Delete any items that are not applicable to this PR. - [ ] Any text added follows [EUI's writing guidelines](https://elastic.github.io/eui/#/guidelines/writing), uses sentence case text and includes [i18n support](https://github.com/elastic/kibana/blob/main/packages/kbn-i18n/README.md) - [ ] [Documentation](https://www.elastic.co/guide/en/kibana/master/development-documentation.html) was added for features that require explanation or tutorials - [x] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios - [ ] [Flaky Test Runner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was used on any tests changed - [ ] Any UI touched in this PR is usable by keyboard only (learn more about [keyboard accessibility](https://webaim.org/techniques/keyboard/)) - [ ] Any UI touched in this PR does not create any new axe failures (run axe in browser: [FF](https://addons.mozilla.org/en-US/firefox/addon/axe-devtools/), [Chrome](https://chrome.google.com/webstore/detail/axe-web-accessibility-tes/lhdoppojpmngadmnindnejefpokejbdd?hl=en-US)) - [ ] If a plugin configuration key changed, check if it needs to be allowlisted in the cloud and added to the [docker list](https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/resources/base/bin/kibana-docker) - [ ] This renders correctly on smaller devices using a responsive layout. (You can test this [in your browser](https://www.browserstack.com/guide/responsive-testing-on-local-server)) - [ ] This was checked for [cross-browser compatibility](https://www.elastic.co/support/matrix#matrix_browsers) ### Risk Matrix Delete this section if it is not applicable to this PR. Before closing this PR, invite QA, stakeholders, and other developers to identify risks that should be tested prior to the change/feature release. When forming the risk matrix, consider some of the following examples and how they may potentially impact the change: | Risk | Probability | Severity | Mitigation/Notes | |---------------------------|-------------|----------|-------------------------| | Multiple Spaces—unexpected behavior in non-default Kibana Space. | Low | High | Integration tests will verify that all features are still supported in non-default Kibana Space and when user switches between spaces. | | Multiple nodes—Elasticsearch polling might have race conditions when multiple Kibana nodes are polling for the same tasks. | High | Low | Tasks are idempotent, so executing them multiple times will not result in logical error, but will degrade performance. To test for this case we add plenty of unit tests around this logic and document manual testing procedure. | | Code should gracefully handle cases when feature X or plugin Y are disabled. | Medium | High | Unit tests will verify that any feature flag or plugin combination still results in our service operational. | | [See more potential risk examples](https://github.com/elastic/kibana/blob/main/RISK_MATRIX.mdx) | ### For maintainers - [ ] This was checked for breaking API changes and was [labeled appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)
2025-04-24 01:38:56 -04:00 · 2024-07-30 19:48:19 +01:00 · 2024-07-30 19:48:19 +01:00 · d433b91918
commit d433b91918
parent ea620c4163
4 changed files with 62 additions and 20 deletions
--- a/x-pack/plugins/search_playground/public/components/chat.tsx
+++ b/x-pack/plugins/search_playground/public/components/chat.tsx
@ -56,7 +56,7 @@ export const Chat = () => {
    handleSubmit,
    getValues,
  } = useFormContext<ChatForm>();
-  const { messages, append, stop: stopRequest, setMessages, reload, error } = useChat();
+  const { messages, append, stop: stopRequest, setMessages, reload } = useChat();
  const messagesRef = useAutoBottomScroll();
  const [isRegenerating, setIsRegenerating] = useState<boolean>(false);
  const usageTracker = useUsageTracker();
@ -88,8 +88,8 @@ export const Chat = () => {
  );

  const isToolBarActionsDisabled = useMemo(
-    () => chatMessages.length <= 1 || !!error || isRegenerating || isSubmitting,
-    [chatMessages, error, isSubmitting, isRegenerating]
+    () => chatMessages.length <= 1 || isRegenerating || isSubmitting,
+    [chatMessages, isSubmitting, isRegenerating]
  );

  const regenerateMessages = async () => {
--- a/x-pack/plugins/search_playground/server/lib/conversational_chain.test.ts
+++ b/x-pack/plugins/search_playground/server/lib/conversational_chain.test.ts
@ -305,6 +305,45 @@ describe('conversational chain', () => {
    });
  }, 10000);

+  it('should omit the system messages in chat', async () => {
+    await createTestChain({
+      responses: ['the final answer'],
+      chat: [
+        {
+          id: '1',
+          role: MessageRole.user,
+          content: 'what is the work from home policy?',
+        },
+        {
+          id: '2',
+          role: MessageRole.system,
+          content: 'Error occurred. Please try again.',
+        },
+      ],
+      expectedFinalAnswer: 'the final answer',
+      expectedDocs: [
+        {
+          documents: [
+            { metadata: { _id: '1', _index: 'index' }, pageContent: 'value' },
+            { metadata: { _id: '1', _index: 'website' }, pageContent: 'value2' },
+          ],
+          type: 'retrieved_docs',
+        },
+      ],
+      expectedTokens: [
+        { type: 'context_token_count', count: 15 },
+        { type: 'prompt_token_count', count: 28 },
+      ],
+      expectedSearchRequest: [
+        {
+          method: 'POST',
+          path: '/index,website/_search',
+          body: { query: { match: { field: 'what is the work from home policy?' } }, size: 3 },
+        },
+      ],
+    });
+  }, 10000);
+
  it('should cope with quotes in the query', async () => {
    await createTestChain({
      responses: ['rewrite "the" question', 'the final answer'],
--- a/x-pack/plugins/search_playground/server/lib/conversational_chain.ts
+++ b/x-pack/plugins/search_playground/server/lib/conversational_chain.ts
@ -18,7 +18,7 @@ import { createStreamDataTransformer, experimental_StreamData } from 'ai';
 import { BaseLanguageModel } from '@langchain/core/language_models/base';
 import { BaseMessage } from '@langchain/core/messages';
 import { HumanMessage, AIMessage } from '@langchain/core/messages';
-import { ChatMessage, MessageRole } from '../types';
+import { ChatMessage } from '../types';
 import { ElasticsearchRetriever } from './elasticsearch_retriever';
 import { renderTemplate } from '../utils/render_template';

@ -49,25 +49,28 @@ interface ContextInputs {
  question: string;
 }

-const getSerialisedMessages = (chatHistory: ChatMessage[]) => {
+const getSerialisedMessages = (chatHistory: BaseMessage[]) => {
  const formattedDialogueTurns = chatHistory.map((message) => {
-    if (message.role === MessageRole.user) {
+    if (message instanceof HumanMessage) {
      return `Human: ${message.content}`;
-    } else if (message.role === MessageRole.assistant) {
+    } else if (message instanceof AIMessage) {
      return `Assistant: ${message.content}`;
    }
  });
  return formattedDialogueTurns.join('\n');
 };

-const getMessages = (chatHistory: ChatMessage[]) => {
-  return chatHistory.map((message) => {
-    if (message.role === 'human') {
-      return new HumanMessage(message.content);
-    } else {
-      return new AIMessage(message.content);
-    }
-  });
+export const getMessages = (chatHistory: ChatMessage[]) => {
+  return chatHistory
+    .map((message) => {
+      if (message.role === 'human') {
+        return new HumanMessage(message.content);
+      } else if (message.role === 'assistant') {
+        return new AIMessage(message.content);
+      }
+      return null;
+    })
+    .filter((message): message is BaseMessage => message !== null);
 };

 const buildContext = (docs: Document[]) => {
@ -141,8 +144,9 @@ class ConversationalChainFn {
    const data = new experimental_StreamData();

    const messages = msgs ?? [];
-    const previousMessages = messages.slice(0, -1);
-    const question = messages[messages.length - 1]!.content;
+    const lcMessages = getMessages(messages);
+    const previousMessages = lcMessages.slice(0, -1);
+    const question = lcMessages[lcMessages.length - 1]!.content;
    const retrievedDocs: Document[] = [];

    let retrievalChain: Runnable = RunnableLambda.from(() => '');
@ -165,7 +169,7 @@ class ConversationalChainFn {
      return input.question;
    });

-    if (previousMessages.length > 0) {
+    if (lcMessages.length > 1) {
      const questionRewritePromptTemplate = PromptTemplate.fromTemplate(
        this.options.questionRewritePrompt
      );
@ -184,7 +188,6 @@ class ConversationalChainFn {
      });
    }

-    const lcMessages = getMessages(messages);
    const prompt = ChatPromptTemplate.fromMessages([
      SystemMessagePromptTemplate.fromTemplate(this.options.prompt),
      ...lcMessages,
--- a/x-pack/plugins/search_playground/server/routes.ts
+++ b/x-pack/plugins/search_playground/server/routes.ts
@ -31,7 +31,7 @@ export function createRetriever(esQuery: string) {
      const query = JSON.parse(replacedQuery);
      return query;
    } catch (e) {
-      throw Error(e);
+      throw Error("Failed to parse the Elasticsearch Query. Check Query to make sure it's valid.");
    }
  };
 }