[9.0] [Obs AI Assistant] Ensure semantic query contribute to score (#213870) (#214084)

# Backport This will backport the following commits from `main` to `9.0`: - [[Obs AI Assistant] Ensure semantic query contribute to score (#213870)](https://github.com/elastic/kibana/pull/213870)  ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sorenlouv/backport)  Co-authored-by: Søren Louv-Jansen <soren.louv@elastic.co>
2025-04-23 09:19:04 -04:00 · 2025-03-12 22:23:36 +11:00 · 2025-03-12 22:23:36 +11:00 · 18cb26cba7
commit 18cb26cba7
parent d05973112c
5 changed files with 292 additions and 6 deletions
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/knowledge_base_service/index.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/knowledge_base_service/index.ts
@ -46,6 +46,7 @@ interface Dependencies {

 export interface RecalledEntry {
  id: string;
+  title?: string;
  text: string;
  score: number | null;
  is_correction?: boolean;
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/knowledge_base_service/recall_from_search_connectors.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/knowledge_base_service/recall_from_search_connectors.ts
@ -72,24 +72,31 @@ async function recallFromSemanticTextConnectors({
    index: connectorIndices,
    fields: `*`,
    allow_no_indices: true,
-    types: ['semantic_text'],
+    types: ['text'],
    filters: '-metadata,-parent',
  });

-  const semanticTextFields = Object.keys(fieldCaps.fields);
-  if (!semanticTextFields.length) {
+  const textFields = Object.keys(fieldCaps.fields);
+  if (!textFields.length) {
+    logger.debug(`No text fields found in indices: ${connectorIndices}`);
    return [];
  }
-  logger.debug(`Semantic text field for search connectors: ${semanticTextFields}`);
+
+  logger.debug(`Text field for search connectors: ${textFields}`);

  const params = {
    index: connectorIndices,
    size: 20,
    query: {
      bool: {
-        should: semanticTextFields.flatMap((field) => {
+        should: textFields.flatMap((field) => {
          return queries.map(({ text, boost = 1 }) => ({
-            bool: { filter: [{ semantic: { field, query: text, boost } }] },
+            match: {
+              [field]: {
+                query: text,
+                boost,
+              },
+            },
          }));
        }),
        minimum_should_match: 1,
--- a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/complete/functions/recall.spec.ts
+++ b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/complete/functions/recall.spec.ts
@ -0,0 +1,251 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import expect from '@kbn/expect';
+import { AI_ASSISTANT_KB_INFERENCE_ID } from '@kbn/observability-ai-assistant-plugin/server/service/inference_endpoint';
+import { first, uniq } from 'lodash';
+import type { DeploymentAgnosticFtrProviderContext } from '../../../../../ftr_provider_context';
+import {
+  clearKnowledgeBase,
+  deleteInferenceEndpoint,
+  deleteKnowledgeBaseModel,
+  importTinyElserModel,
+  setupKnowledgeBase,
+  waitForKnowledgeBaseReady,
+} from '../../knowledge_base/helpers';
+import { setAdvancedSettings } from '../../utils/advanced_settings';
+
+const customSearchConnectorIndex = 'animals_kb';
+
+export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderContext) {
+  const observabilityAIAssistantAPIClient = getService('observabilityAIAssistantApi');
+  const es = getService('es');
+  const ml = getService('ml');
+
+  describe('recall', function () {
+    before(async () => {
+      await addSampleDocsToInternalKb(getService);
+      await addSampleDocsToCustomIndex(getService);
+    });
+
+    after(async () => {
+      await deleteKnowledgeBaseModel(ml);
+      await deleteInferenceEndpoint({ es });
+      await clearKnowledgeBase(es);
+      // clear custom index
+      await es.indices.delete({ index: customSearchConnectorIndex }, { ignore: [404] });
+    });
+
+    describe('GET /internal/observability_ai_assistant/functions/recall', () => {
+      it('produces unique scores for each doc', async () => {
+        const entries = await recall('What happened during the database outage?');
+        const uniqueScores = uniq(entries.map(({ score }) => score));
+        expect(uniqueScores.length).to.be.greaterThan(1);
+        expect(uniqueScores.length).to.be(8);
+      });
+
+      it('returns results from both search connectors and internal kb', async () => {
+        const entries = await recall('What happened during the database outage?');
+        const docTypes = uniq(entries.map(({ id }) => id.split('_')[0]));
+        expect(docTypes).to.eql(['animal', 'technical']);
+      });
+
+      it('returns entries in a consistent order', async () => {
+        const entries = await recall('whales');
+
+        expect(entries.map(({ id, score }) => `${formatScore(score!)} - ${id}`)).to.eql([
+          'high - animal_whale_migration_patterns',
+          'low - animal_elephants_social_structure',
+          'low - technical_api_gateway_timeouts',
+          'low - technical_cache_misses_thirdparty_api',
+          'low - animal_cheetah_life_speed',
+          'low - technical_db_outage_slow_queries',
+          'low - animal_giraffe_habitat_feeding',
+          'low - animal_penguin_antarctic_adaptations',
+        ]);
+      });
+
+      it('returns the "Cheetah" entry from search connectors as the top result', async () => {
+        const entries = await recall('Cheetah');
+        const { text, score } = first(entries)!;
+
+        // search connector entries have their entire doc stringified in `text` field
+        const parsedDoc = JSON.parse(text) as { title: string; text: string };
+        expect(parsedDoc.title).to.eql('The Life of a Cheetah');
+        expect(score).to.greaterThan(0.1);
+      });
+
+      it('returns different result order for different queries', async () => {
+        const databasePromptEntries = await recall('What happened during the database outage?');
+        const animalPromptEntries = await recall('Do you have knowledge about animals?');
+
+        expect(databasePromptEntries.length).to.be(8);
+        expect(animalPromptEntries.length).to.be(8);
+
+        expect(databasePromptEntries.map(({ id }) => id)).not.to.eql(
+          animalPromptEntries.map(({ id }) => id)
+        );
+      });
+    });
+  });
+
+  async function recall(prompt: string) {
+    const { body, status } = await observabilityAIAssistantAPIClient.editor({
+      endpoint: 'POST /internal/observability_ai_assistant/functions/recall',
+      params: {
+        body: {
+          queries: [{ text: prompt }],
+        },
+      },
+    });
+
+    expect(status).to.be(200);
+
+    return body.entries;
+  }
+}
+
+async function addSampleDocsToInternalKb(
+  getService: DeploymentAgnosticFtrProviderContext['getService']
+) {
+  const log = getService('log');
+  const ml = getService('ml');
+  const retry = getService('retry');
+  const observabilityAIAssistantAPIClient = getService('observabilityAIAssistantApi');
+
+  const sampleDocs = [
+    {
+      id: 'technical_db_outage_slow_queries',
+      title: 'Database Outage: Slow Query Execution',
+      text: 'At 03:15 AM UTC, the production database experienced a significant outage, leading to slow query execution and increased response times across multiple services. A surge in database load was detected, with 90% of queries exceeding 2 seconds. A detailed log analysis pointed to locking issues within the transaction queue and inefficient index usage.',
+    },
+    {
+      id: 'technical_api_gateway_timeouts',
+      title: 'Service Timeout: API Gateway Bottleneck',
+      text: 'At 10:45 AM UTC, the API Gateway encountered a timeout issue, causing a 500 error for all incoming requests. Detailed traces indicated a significant bottleneck at the gateway level, where requests stalled while waiting for upstream service responses. The upstream service was overwhelmed due to a sudden spike in inbound traffic and failed to release resources promptly.',
+    },
+    {
+      id: 'technical_cache_misses_thirdparty_api',
+      title: 'Cache Misses and Increased Latency: Third-Party API Failure',
+      text: 'At 04:30 PM UTC, a dramatic increase in cache misses and latency was observed. The failure of a third-party API prevented critical data from being cached, leading to unnecessary re-fetching of resources from external sources. This caused significant delays in response times, with up to 10-second delays in some key services.',
+    },
+  ];
+
+  await importTinyElserModel(ml);
+  await setupKnowledgeBase(observabilityAIAssistantAPIClient);
+  await waitForKnowledgeBaseReady({ observabilityAIAssistantAPIClient, log, retry });
+
+  await observabilityAIAssistantAPIClient.editor({
+    endpoint: 'POST /internal/observability_ai_assistant/kb/entries/import',
+    params: {
+      body: {
+        entries: sampleDocs,
+      },
+    },
+  });
+}
+
+async function addSampleDocsToCustomIndex(
+  getService: DeploymentAgnosticFtrProviderContext['getService']
+) {
+  const es = getService('es');
+  const supertest = getService('supertest');
+  const log = getService('log');
+
+  const sampleDocs = [
+    {
+      id: 'animal_elephants_social_structure',
+      title: 'Elephants and Their Social Structure',
+      text: 'Elephants are highly social animals that live in matriarchal herds led by the oldest female. These animals communicate through low-frequency sounds, called infrasound, that travel long distances. They are known for their intelligence, strong memory, and deep emotional bonds with each other.',
+    },
+    {
+      id: 'animal_cheetah_life_speed',
+      title: 'The Life of a Cheetah',
+      text: 'Cheetahs are the fastest land animals, capable of reaching speeds up to 60 miles per hour in short bursts. They rely on their speed to catch prey, such as gazelles. Unlike other big cats, cheetahs cannot roar, but they make distinctive chirping sounds, especially when communicating with their cubs.',
+    },
+    {
+      id: 'animal_whale_migration_patterns',
+      title: 'Whales and Their Migration Patterns',
+      text: 'Whales are known for their long migration patterns, traveling thousands of miles between feeding and breeding grounds.',
+    },
+    {
+      id: 'animal_giraffe_habitat_feeding',
+      title: 'Giraffes: Habitat and Feeding Habits',
+      text: 'Giraffes are the tallest land animals, with long necks that help them reach leaves high up in trees. They live in savannas and grasslands, where they feed on leaves, twigs, and fruits from acacia trees.',
+    },
+    {
+      id: 'animal_penguin_antarctic_adaptations',
+      title: 'Penguins and Their Antarctic Adaptations',
+      text: 'Penguins are flightless birds that have adapted to life in the cold Antarctic environment. They have a thick layer of blubber to keep warm, and their wings have evolved into flippers for swimming in the icy waters.',
+    },
+  ];
+
+  // create index with semantic_text mapping for `text` field
+  log.info('Creating custom index with sample animal docs...');
+  await es.indices.create({
+    index: customSearchConnectorIndex,
+    mappings: {
+      properties: {
+        title: { type: 'text' },
+        text: { type: 'semantic_text', inference_id: AI_ASSISTANT_KB_INFERENCE_ID },
+      },
+    },
+  });
+
+  log.info('Indexing sample animal docs...');
+  // ingest sampleDocs
+  await Promise.all(
+    sampleDocs.map(async (doc) => {
+      const { id, ...restDoc } = doc;
+      return es.index({
+        refresh: 'wait_for',
+        index: customSearchConnectorIndex,
+        id,
+        body: restDoc,
+      });
+    })
+  );
+
+  // update the advanced settings (`observability:aiAssistantSearchConnectorIndexPattern`) to include the custom index
+  await setAdvancedSettings(supertest, {
+    'observability:aiAssistantSearchConnectorIndexPattern': customSearchConnectorIndex,
+  });
+}
+
+function formatScore(score: number) {
+  if (score > 0.5) {
+    return 'high';
+  }
+
+  if (score > 0.1) {
+    return 'medium';
+  }
+
+  return 'low';
+}
+
+// Clear data before running tests
+// this is useful for debugging purposes
+// @ts-ignore
+async function clearBefore(getService: DeploymentAgnosticFtrProviderContext['getService']) {
+  const log = getService('log');
+  const ml = getService('ml');
+  const es = getService('es');
+
+  await deleteKnowledgeBaseModel(ml).catch(() => {
+    log.error('Failed to delete knowledge base model');
+  });
+  await deleteInferenceEndpoint({ es }).catch(() => {
+    log.error('Failed to delete inference endpoint');
+  });
+  await clearKnowledgeBase(es).catch(() => {
+    log.error('Failed to clear knowledge base');
+  });
+  await es.indices.delete({ index: customSearchConnectorIndex }, { ignore: [404] }).catch(() => {
+    log.error('Failed to clear custom index');
+  });
+}
--- a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/index.ts
+++ b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/index.ts
@ -21,6 +21,7 @@ export default function aiAssistantApiIntegrationTests({
    loadTestFile(require.resolve('./complete/functions/execute_query.spec.ts'));
    loadTestFile(require.resolve('./complete/functions/elasticsearch.spec.ts'));
    loadTestFile(require.resolve('./complete/functions/summarize.spec.ts'));
+    loadTestFile(require.resolve('./complete/functions/recall.spec.ts'));
    loadTestFile(require.resolve('./public_complete/public_complete.spec.ts'));
    loadTestFile(require.resolve('./knowledge_base/knowledge_base_setup.spec.ts'));
    loadTestFile(
--- a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/utils/advanced_settings.ts
+++ b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/utils/advanced_settings.ts
@ -0,0 +1,26 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import {
+  ELASTIC_HTTP_VERSION_HEADER,
+  X_ELASTIC_INTERNAL_ORIGIN_REQUEST,
+} from '@kbn/core-http-common';
+
+import type SuperTest from 'supertest';
+
+export const setAdvancedSettings = async (
+  supertest: SuperTest.Agent,
+  settings: Record<string, string[] | string | number | boolean>
+) => {
+  return supertest
+    .post('/internal/kibana/settings')
+    .set('kbn-xsrf', 'true')
+    .set(ELASTIC_HTTP_VERSION_HEADER, '1')
+    .set(X_ELASTIC_INTERNAL_ORIGIN_REQUEST, 'kibana')
+    .send({ changes: settings })
+    .expect(200);
+};