[Security Solution] [AI Assistant] Replace polynomial regular expression with constant time string manipulation (#209314)

## Summary Fixes https://github.com/elastic/kibana-team/issues/1442 This PR replaces a poorly performing regular expression with a constant time string manipulation approach. Context: The regex is used to remove all references from a string when a user copies a message from the assistant and when conversation history is passed to the LLM e.g. ``` "The sky is blue{reference(1234)} and the grass is green{reference(4321)}" -> "The sky is blue and the grass is green" ``` Changes: - Replace the regular expression inside of `removeContentReferences()` - Add tests to verify new logic is correct. - Fix a bug in the contentReference markdown parser that was found by @andrew-goldstein [here](https://github.com/elastic/kibana/pull/209314/files#r1943198510) - For alerts page citations, add a filter for open and acknowledge alerts within the last 24 hours [here](https://github.com/elastic/kibana/pull/209314/files#diff-f17fbe7edfe72943fecbe5ddd8dca6c024a48fe4f90bf4f66650cef16091b769R36) ### How to test new regex: One of the changes in this PR improves the performance of a regex. In real life, no one has ever reached any performance issues with this regex's and I don't think it is realistically possible to reach that limit without other things breaking (i.e. the message sent to/returned by the assistant would need to be so large that it would exceed the context window). Therefore, all we can test is that the functionality still works as expected after this change. - Enable the feature flag ```yaml # kibana.dev.yml xpack.securitySolution.enableExperimental: ['contentReferencesEnabled'] ``` - Open the security assistant - Ask the assistant a question about your alerts or a document in your KB. The assistant response should contain citations. - Copy the response to the clipboard using the copy button. <img width="785" alt="image" src="https://github.com/user-attachments/assets/edded3a3-8cb9-40a8-918e-a9718e7afc22" /> - Your clipboard should contain the response without any citations ### How to test the alerts page filter - Ask a question about your open alerts and make sure a citation is returned. - Click on the citation - Verify a new tab is opened and the alerts page is visible with a filter for open and acknowledge alerts and there is a now-24h time window filter. ### Checklist Check the PR satisfies following conditions. Reviewers should verify this PR satisfies this list as well. - [X] Any text added follows [EUI's writing guidelines](https://elastic.github.io/eui/#/guidelines/writing), uses sentence case text and includes [i18n support](https://github.com/elastic/kibana/blob/main/src/platform/packages/shared/kbn-i18n/README.md) - [X] [Documentation](https://www.elastic.co/guide/en/kibana/master/development-documentation.html) was added for features that require explanation or tutorials - [X] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios - [X] If a plugin configuration key changed, check if it needs to be allowlisted in the cloud and added to the [docker list](https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/resources/base/bin/kibana-docker) - [X] This was checked for breaking HTTP API changes, and any breaking changes have been approved by the breaking-change committee. The `release_note:breaking` label should be applied in these situations. - [X] [Flaky Test Runner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was used on any tests changed - [X] The PR description includes the appropriate Release Notes section, and the correct `release_note:*` label is applied per the [guidelines](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process) ### Identify risks Does this PR introduce any risks? For example, consider risks like hard to test bugs, performance regression, potential of data loss. Describe the risk, its severity, and mitigation for each identified risk. Invite stakeholders and evaluate how to proceed before merging. - [ ] [See some risk examples](https://github.com/elastic/kibana/blob/main/RISK_MATRIX.mdx) - [ ] ... --------- Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com> Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
2025-04-24 09:48:58 -04:00 · 2025-02-07 09:22:37 +00:00 · 2025-02-07 09:22:37 +00:00 · 2bf8a24c5c
commit 2bf8a24c5c
parent 8831e5b25d
9 changed files with 212 additions and 29 deletions
--- a/x-pack/platform/packages/shared/kbn-elastic-assistant-common/impl/content_references/references/utils.test.ts
+++ b/x-pack/platform/packages/shared/kbn-elastic-assistant-common/impl/content_references/references/utils.test.ts
@ -0,0 +1,42 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { removeContentReferences } from './utils';
+
+describe('utils', () => {
+  it.each([
+    ['this has no content references', 'this has no content references'],
+    [
+      'The sky is blue{reference(1234)} and the grass is green{reference(4321)}',
+      'The sky is blue and the grass is green',
+    ],
+    ['', ''],
+    ['{reference(1234)}', ''],
+    [' {reference(1234)} ', '  '],
+    ['{reference(1234', '{reference(1234'],
+    ['{reference(1234)', '{reference(1234)'],
+    ['{reference(1234)}{reference(1234)}{reference(1234)}', ''],
+    ['{reference(1234)}reference(1234)}{reference(1234)}', 'reference(1234)}'],
+  ])('removesContentReferences from "%s"', (input: string, expected: string) => {
+    const result = removeContentReferences(input);
+
+    expect(result).toEqual(expected);
+  });
+
+  // https://github.com/elastic/kibana/security/code-scanning/539
+  it('removesContentReferences does not run in polynomial time', () => {
+    const input = `${'{reference('.repeat(100000)}x${')'.repeat(100000)}`;
+    const startTime = performance.now(); // Start timing
+
+    removeContentReferences(input);
+
+    const endTime = performance.now(); // End timing
+    const executionTime = endTime - startTime; // Time in milliseconds
+
+    expect(executionTime).toBeLessThan(1000); // Assert under 1 second
+  });
+});
--- a/x-pack/platform/packages/shared/kbn-elastic-assistant-common/impl/content_references/references/utils.ts
+++ b/x-pack/platform/packages/shared/kbn-elastic-assistant-common/impl/content_references/references/utils.ts
@ -46,5 +46,28 @@ export const contentReferenceString = (contentReference: ContentReference) => {
 * @returns content with content references replaced with ''
 */
 export const removeContentReferences = (content: string) => {
-  return content.replaceAll(/\{reference\(.*?\)\}/g, '');
+  let result = '';
+  let i = 0;
+
+  while (i < content.length) {
+    const start = content.indexOf('{reference(', i);
+    if (start === -1) {
+      // No more "{reference(" → append the rest of the string
+      result += content.slice(i);
+      break;
+    }
+
+    const end = content.indexOf(')}', start);
+    if (end === -1) {
+      // If no closing ")}" is found, treat the rest as normal text
+      result += content.slice(i);
+      break;
+    }
+
+    // Append everything before "{reference(" and skip the matched part
+    result += content.slice(i, start);
+    i = end + 2; // Move index past ")}"
+  }
+
+  return result;
 };
--- a/x-pack/solutions/security/plugins/elastic_assistant/server/lib/prompt/prompts.test.ts
+++ b/x-pack/solutions/security/plugins/elastic_assistant/server/lib/prompt/prompts.test.ts
@ -0,0 +1,31 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import {
+  BEDROCK_SYSTEM_PROMPT,
+  DEFAULT_SYSTEM_PROMPT,
+  GEMINI_SYSTEM_PROMPT,
+  STRUCTURED_SYSTEM_PROMPT,
+} from './prompts';
+
+describe('prompts', () => {
+  it.each([
+    [DEFAULT_SYSTEM_PROMPT, '{include_citations_prompt_placeholder}', 1],
+    [GEMINI_SYSTEM_PROMPT, '{include_citations_prompt_placeholder}', 1],
+    [BEDROCK_SYSTEM_PROMPT, '{include_citations_prompt_placeholder}', 1],
+    [STRUCTURED_SYSTEM_PROMPT, '{include_citations_prompt_placeholder}', 1],
+    [DEFAULT_SYSTEM_PROMPT, 'You are a security analyst', 1],
+    [GEMINI_SYSTEM_PROMPT, 'You are an assistant', 1],
+    [BEDROCK_SYSTEM_PROMPT, 'You are a security analyst', 1],
+  ])(
+    '"%s" contains "%s" %s times',
+    (prompt: string, containedString: string, expectedCount: number) => {
+      const regex = new RegExp(containedString, 'g');
+      expect((prompt.match(regex) || []).length).toBe(expectedCount);
+    }
+  );
+});
--- a/x-pack/solutions/security/plugins/elastic_assistant/server/lib/prompt/prompts.ts
+++ b/x-pack/solutions/security/plugins/elastic_assistant/server/lib/prompt/prompts.ts
@ -7,20 +7,15 @@

 export const KNOWLEDGE_HISTORY =
  'If available, use the Knowledge History provided to try and answer the question. If not provided, you can try and query for additional knowledge via the KnowledgeBaseRetrievalTool.';
-export const INCLUDE_CITATIONS = `In your response, always include citations using the format: \`{reference(...)}\` when information returned by a tool is used. Only use the reference string provided by the tools and do not create reference strings using other information. The reference should be placed after the punctuation marks.
-  Example citations:
-  \`\`\`
-  Your favourite food is pizza. {reference(HMCxq)}
-  The document was published in 2025. {reference(prSit)}
-  \`\`\``;
+export const INCLUDE_CITATIONS = `\n\nAnnotate your answer with relevant citations. For example: "The sky is blue. {reference(prSit)}"\n\n`;
 export const DEFAULT_SYSTEM_PROMPT = `You are a security analyst and expert in resolving security incidents. Your role is to assist by answering questions about Elastic Security. Do not answer questions unrelated to Elastic Security. ${KNOWLEDGE_HISTORY} {include_citations_prompt_placeholder}`;
 // system prompt from @afirstenberg
 const BASE_GEMINI_PROMPT =
  'You are an assistant that is an expert at using tools and Elastic Security, doing your best to use these tools to answer questions or follow instructions. It is very important to use tools to answer the question or follow the instructions rather than coming up with your own answer. Tool calls are good. Sometimes you may need to make several tool calls to accomplish the task or get an answer to the question that was asked. Use as many tool calls as necessary.';
 const KB_CATCH =
  'If the knowledge base tool gives empty results, do your best to answer the question from the perspective of an expert security analyst.';
-export const GEMINI_SYSTEM_PROMPT = `${BASE_GEMINI_PROMPT} ${KB_CATCH} {include_citations_prompt_placeholder}`;
-export const BEDROCK_SYSTEM_PROMPT = `Use tools as often as possible, as they have access to the latest data and syntax. Never return <thinking> tags in the response, but make sure to include <result> tags content in the response. Do not reflect on the quality of the returned search results in your response. ALWAYS return the exact response from NaturalLanguageESQLTool verbatim in the final response, without adding further description.`;
+export const GEMINI_SYSTEM_PROMPT = `${BASE_GEMINI_PROMPT} {include_citations_prompt_placeholder} ${KB_CATCH}`;
+export const BEDROCK_SYSTEM_PROMPT = `${DEFAULT_SYSTEM_PROMPT} Use tools as often as possible, as they have access to the latest data and syntax. Never return <thinking> tags in the response, but make sure to include <result> tags content in the response. Do not reflect on the quality of the returned search results in your response. ALWAYS return the exact response from NaturalLanguageESQLTool verbatim in the final response, without adding further description.`;
 export const GEMINI_USER_PROMPT = `Now, always using the tools at your disposal, step by step, come up with a response to this request:\n\n`;

 export const STRUCTURED_SYSTEM_PROMPT = `Respond to the human as helpfully and accurately as possible. ${KNOWLEDGE_HISTORY} {include_citations_prompt_placeholder} You have access to the following tools:
--- a/x-pack/solutions/security/plugins/security_solution/public/assistant/get_comments/content_reference/components/content_reference_component_factory.test.tsx
+++ b/x-pack/solutions/security/plugins/security_solution/public/assistant/get_comments/content_reference/components/content_reference_component_factory.test.tsx
@ -14,6 +14,9 @@ import type { ContentReferenceNode } from '../content_reference_parser';
 const testContentReferenceNode = { contentReferenceId: '1' } as ContentReferenceNode;

 jest.mock('../../../../common/lib/kibana', () => ({
+  useNavigation: jest.fn().mockReturnValue({
+    navigateTo: jest.fn(),
+  }),
  useKibana: jest.fn().mockReturnValue({
    services: {
      discover: {
@ -154,4 +157,26 @@ describe('contentReferenceComponentFactory', () => {
    expect(container).toBeEmptyDOMElement();
    expect(screen.queryByText('[1]')).not.toBeInTheDocument();
  });
+
+  it('renders nothing if contentReferenceId is empty string', async () => {
+    const Component = contentReferenceComponentFactory({
+      contentReferences: {
+        '1': {
+          id: '1',
+          type: 'SecurityAlertsPage',
+        },
+      } as ContentReferences,
+      contentReferencesVisible: true,
+      loading: false,
+    });
+
+    const { container } = render(
+      <Component
+        {...({ contentReferenceId: '', contentReferenceCount: -1 } as ContentReferenceNode)}
+      />
+    );
+
+    expect(container).toBeEmptyDOMElement();
+    expect(screen.queryByText('[-1]')).not.toBeInTheDocument();
+  });
 });
--- a/x-pack/solutions/security/plugins/security_solution/public/assistant/get_comments/content_reference/components/content_reference_component_factory.tsx
+++ b/x-pack/solutions/security/plugins/security_solution/public/assistant/get_comments/content_reference/components/content_reference_component_factory.tsx
@ -30,6 +30,7 @@ export const contentReferenceComponentFactory = ({
    contentReferenceNode: ContentReferenceNode
  ): React.ReactNode => {
    if (!contentReferencesVisible) return null;
+    if (!contentReferenceNode.contentReferenceId) return null;

    const defaultNode = (
      <ContentReferenceButton
--- a/x-pack/solutions/security/plugins/security_solution/public/assistant/get_comments/content_reference/components/security_alerts_page_reference.tsx
+++ b/x-pack/solutions/security/plugins/security_solution/public/assistant/get_comments/content_reference/components/security_alerts_page_reference.tsx
@ -11,7 +11,8 @@ import { EuiLink } from '@elastic/eui';
 import type { ContentReferenceNode } from '../content_reference_parser';
 import { PopoverReference } from './popover_reference';
 import { SECURITY_ALERTS_PAGE_REFERENCE_LABEL } from './translations';
-import { useKibana } from '../../../../common/lib/kibana';
+import { useNavigateToAlertsPageWithFilters } from '../../../../common/hooks/use_navigate_to_alerts_page_with_filters';
+import { FILTER_OPEN, FILTER_ACKNOWLEDGED } from '../../../../../common/types';

 interface Props {
  contentReferenceNode: ContentReferenceNode;
@ -22,17 +23,22 @@ export const SecurityAlertsPageReference: React.FC<Props> = ({
  contentReferenceNode,
  securityAlertsPageContentReference,
 }) => {
-  const { navigateToApp } = useKibana().services.application;
+  const openAlertsPageWithFilters = useNavigateToAlertsPageWithFilters();

  const onClick = useCallback(
    (e: React.MouseEvent) => {
      e.preventDefault();
-      navigateToApp('security', {
-        path: `alerts`,
-        openInNewTab: true,
-      });
+      openAlertsPageWithFilters(
+        {
+          selectedOptions: [FILTER_OPEN, FILTER_ACKNOWLEDGED],
+          fieldName: 'kibana.alert.workflow_status',
+          persist: false,
+        },
+        true,
+        '(global:(timerange:(fromStr:now-24h,kind:relative,toStr:now)))'
+      );
    },
-    [navigateToApp]
+    [openAlertsPageWithFilters]
  );

  return (
--- a/x-pack/solutions/security/plugins/security_solution/public/assistant/get_comments/content_reference/content_reference_parser.test.ts
+++ b/x-pack/solutions/security/plugins/security_solution/public/assistant/get_comments/content_reference/content_reference_parser.test.ts
@ -11,6 +11,62 @@ import type { Parent } from 'mdast';
 import { ContentReferenceParser } from './content_reference_parser';

 describe('ContentReferenceParser', () => {
+  it('extracts references from poem', async () => {
+    const file = unified().use([[markdown, {}], ContentReferenceParser])
+      .parse(`With a wagging tail and a wet, cold nose,{reference(ccaSI)}
+A furry friend, from head to toes.{reference(ccaSI)}
+Loyal companion, always near,{reference(ccaSI)}
+Chasing squirrels, full of cheer.{reference(ccaSI)}
+A paw to hold, a gentle nudge,
+{reference(ccaSI)}
+A furry alarm, a playful judge.{reference(ccaSI)}
+From golden retrievers to tiny Chihuahuas,{reference(ccaSI)}
+Their love's a gift, that always conquers.{reference(ccaSI)}
+So cherish your dog, with all your might,{reference(ccaSI)}
+Their love's a beacon, shining bright.{reference(ccaSI)}`) as Parent;
+
+    expect(
+      (file.children[0] as Parent).children.filter(
+        (child) => (child.type as string) === 'contentReference'
+      )
+    ).toHaveLength(10);
+    expect(file.children[0].children).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({ type: 'text', value: '\nA paw to hold, a gentle nudge,\n' }),
+      ])
+    );
+  });
+
+  it('extracts reference after linebreak', async () => {
+    const file = unified().use([[markdown, {}], ContentReferenceParser]).parse(`First line
+{reference(FTQJp)}
+`) as Parent;
+
+    expect(file.children[0].children).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({ type: 'text', value: 'First line\n' }),
+        expect.objectContaining({ type: 'contentReference' }),
+      ])
+    );
+  });
+
+  it('eats empty content reference', async () => {
+    const file = unified()
+      .use([[markdown, {}], ContentReferenceParser])
+      .parse('There is an empty content reference.{reference()}') as Parent;
+
+    expect(file.children[0].children).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({ type: 'text', value: 'There is an empty content reference.' }),
+        expect.objectContaining({
+          type: 'contentReference',
+          contentReferenceCount: -1,
+          contentReferenceId: '',
+        }),
+      ])
+    );
+  });
+
  it('eats space preceding content reference', async () => {
    const file = unified()
      .use([[markdown, {}], ContentReferenceParser])
--- a/x-pack/solutions/security/plugins/security_solution/public/assistant/get_comments/content_reference/content_reference_parser.ts
+++ b/x-pack/solutions/security/plugins/security_solution/public/assistant/get_comments/content_reference/content_reference_parser.ts
@ -17,9 +17,9 @@ export interface ContentReferenceNode extends Node {
  contentReferenceBlock: ContentReferenceBlock;
 }

-/**
- * Parses `{reference(contentReferenceId)}` or ` {reference(contentReferenceId)}` (notice space prefix) into ContentReferenceNode
- */
+/** Matches `{reference` and ` {reference(` */
+const REFERENCE_START_PATTERN = '\\u0020?\\{reference';
+
 export const ContentReferenceParser: Plugin = function ContentReferenceParser() {
  const Parser = this.Parser;
  const tokenizers = Parser.prototype.inlineTokenizers;
@ -33,10 +33,9 @@ export const ContentReferenceParser: Plugin = function ContentReferenceParser()
    value,
    silent
  ) {
-    const [match] = value.match(/^\s?{reference/) || [];
-    if (!match) return false;
+    const [match] = value.match(new RegExp(`^${REFERENCE_START_PATTERN}`)) || [];

-    if (value.includes('\n')) return false;
+    if (!match) return false;

    if (value[match.length] !== '(') return false;

@ -81,10 +80,6 @@ export const ContentReferenceParser: Plugin = function ContentReferenceParser()
      });
    }

-    if (!contentReferenceId) {
-      return false;
-    }
-
    if (silent) {
      return true;
    }
@ -95,6 +90,9 @@ export const ContentReferenceParser: Plugin = function ContentReferenceParser()
    const contentReferenceBlock: ContentReferenceBlock = `{reference(${contentReferenceId})}`;

    const getContentReferenceCount = (id: string) => {
+      if (!id) {
+        return -1;
+      }
      if (id in contentReferenceCounts) {
        return contentReferenceCounts[id];
      }
@ -104,18 +102,24 @@ export const ContentReferenceParser: Plugin = function ContentReferenceParser()

    const toEat = `${match.startsWith(' ') ? ' ' : ''}${contentReferenceBlock}`;

-    return eat(toEat)({
+    const contentReferenceNode: ContentReferenceNode = {
      type: 'contentReference',
      contentReferenceId,
      contentReferenceCount: getContentReferenceCount(contentReferenceId),
      contentReferenceBlock,
-    } as ContentReferenceNode);
+    };
+
+    return eat(toEat)(contentReferenceNode);
  };

  tokenizeCustomCitation.notInLink = true;

  tokenizeCustomCitation.locator = (value, fromIndex) => {
-    return 1 + (value.substring(fromIndex).match(/\s?{reference/)?.index ?? -2);
+    const nextIndex = value.substring(fromIndex).match(new RegExp(REFERENCE_START_PATTERN))?.index;
+    if (nextIndex === undefined) {
+      return -1;
+    }
+    return nextIndex + 1;
  };

  tokenizers.contentReference = tokenizeCustomCitation;