[Obs AI Assistant] [Anonymization] Update system prompt to inform about anonymization (#224211)

Closes https://github.com/elastic/observability-dev/issues/4563 ## Summary This PR updates the Obs AI Assistant system prompt to explicitly inform the LLM about the presence of anonymized entities (e.g., hashes or placeholder tokens). The goal is to prevent the LLM from attempting to interpret or hallucinate the meaning of these anonymized tokens. What Changed - Modified the system prompt to include a new instruction: ```txt Some entities in this conversation (like names, locations, or IDs) have been anonymized using placeholder hashes (e.g., `PER_123`, `LOC_abcd1234`). These tokens should be treated as distinct but semantically unknown entities. Do not try to infer their meaning. Refer to them as-is unless explicitly provided with a description. ``` - This instruction is now included in all prompts sent to the LLM as part of the chat completion setup when there are anonymization rules.
2025-06-27 10:40:07 -04:00 · 2025-06-22 21:11:18 +02:00 · 2025-06-22 21:11:18 +02:00 · e2a833785b
commit e2a833785b
parent 9201bad4e1
6 changed files with 44 additions and 3 deletions
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/common/utils/anonymization/redaction.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/common/utils/anonymization/redaction.ts
@ -8,7 +8,7 @@
 import { DetectedEntity } from '../../types';

 /** Regex matching object‑hash placeholders (40 hex chars) */
-export const HASH_REGEX = /[0-9a-f]{40}/g;
+export const HASH_REGEX = /\b[A-Z]+_[0-9a-f]{40}\b/g;

 /** Default model ID for named entity recognition */
 export const NER_MODEL_ID = 'elastic__distilbert-base-uncased-finetuned-conll03-english';
@ -22,7 +22,8 @@ export function redactEntities(original: string, entities: DetectedEntity[]): st
    .slice()
    .sort((a, b) => b.start_pos - a.start_pos)
    .forEach((e) => {
-      redacted = redacted.slice(0, e.start_pos) + e.hash + redacted.slice(e.end_pos);
+      redacted =
+        redacted.slice(0, e.start_pos) + e.class_name + '_' + e.hash + redacted.slice(e.end_pos);
    });

  return redacted;
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/functions/route.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/functions/route.ts
@ -67,6 +67,8 @@ const getFunctionsRoute = createObservabilityAIAssistantServerRoute({

    const availableFunctionNames = functionDefinitions.map((def) => def.name);

+    const anonymizationService = client.getAnonymizationService();
+
    return {
      functionDefinitions,
      systemMessage: getSystemMessageFromInstructions({
@ -74,6 +76,7 @@ const getFunctionsRoute = createObservabilityAIAssistantServerRoute({
        kbUserInstructions,
        apiUserInstructions: [],
        availableFunctionNames,
+        anonymizationInstruction: anonymizationService.getAnonymizationInstruction(),
      }),
    };
  },
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/anonymization/index.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/anonymization/index.ts
@ -110,7 +110,7 @@ export class AnonymizationService {

        // Update hashMap
        entities.forEach((e) => {
-          this.currentHashMap.set(e.hash, {
+          this.currentHashMap.set(e.class_name + '_' + e.hash, {
            value: e.entity,
            class_name: e.class_name,
            type: e.type,
@ -218,4 +218,24 @@ export class AnonymizationService {
      );
    };
  }
+  isEnabled(): boolean {
+    return this.rules.some((rule) => rule.enabled);
+  }
+  getAnonymizationInstruction(): string {
+    if (!this.isEnabled()) return '';
+
+    const nerClasses = ['PER', 'LOC', 'ORG', 'MISC'];
+    const regexClasses = this.rules
+      .filter((rule) => rule.type === 'regex' && rule.enabled)
+      .map((rule) => (rule as RegexAnonymizationRule).entityClass);
+
+    const allClasses = [...nerClasses, ...regexClasses];
+    const exampleTokens = allClasses.map((c) => `\`${c}_abc123\``).join(', ');
+
+    return `Some entities in this conversation have been anonymized using placeholder tokens (e.g., ${exampleTokens}).
+  These represent named entities such as people (PER), locations (LOC), organizations (ORG), and miscellaneous types (MISC), ${
+    regexClasses.length ? `as well as custom types like ${regexClasses.join(', ')}. ` : ''
+  }
+  Do not attempt to infer their meaning, type, or real-world identity. Refer to them exactly as they appear unless explicitly resolved or described.`;
+  }
 }
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.ts
@ -249,6 +249,8 @@ export class ObservabilityAIAssistantClient {
            availableFunctionNames: disableFunctions
              ? []
              : functionClient.getFunctions().map((fn) => fn.definition.name),
+            anonymizationInstruction:
+              this.dependencies.anonymizationService.getAnonymizationInstruction(),
          })
        ),
        shareReplay()
@ -876,4 +878,8 @@ export class ObservabilityAIAssistantClient {
      this.dependencies.user
    );
  };
+
+  getAnonymizationService = () => {
+    return this.dependencies.anonymizationService;
+  };
 }
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/get_system_message_from_instructions.test.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/get_system_message_from_instructions.test.ts
@ -17,6 +17,7 @@ describe('getSystemMessageFromInstructions', () => {
        kbUserInstructions: [],
        apiUserInstructions: [],
        availableFunctionNames: [],
+        anonymizationInstruction: '',
      })
    ).toEqual(`first\n\nsecond`);
  });
@ -33,6 +34,7 @@ describe('getSystemMessageFromInstructions', () => {
        kbUserInstructions: [],
        apiUserInstructions: [],
        availableFunctionNames: ['myFunction'],
+        anonymizationInstruction: '',
      })
    ).toEqual(`first\n\nmyFunction`);
  });
@ -49,6 +51,7 @@ describe('getSystemMessageFromInstructions', () => {
          },
        ],
        availableFunctionNames: [],
+        anonymizationInstruction: '',
      })
    ).toEqual(`first\n\n${USER_INSTRUCTIONS_HEADER}\n\nsecond from adhoc instruction`);
  });
@ -60,6 +63,7 @@ describe('getSystemMessageFromInstructions', () => {
        kbUserInstructions: [{ id: 'second', text: 'second_kb' }],
        apiUserInstructions: [],
        availableFunctionNames: [],
+        anonymizationInstruction: '',
      })
    ).toEqual(`first\n\n${USER_INSTRUCTIONS_HEADER}\n\nsecond_kb`);
  });
@ -76,6 +80,7 @@ describe('getSystemMessageFromInstructions', () => {
        kbUserInstructions: [],
        apiUserInstructions: [],
        availableFunctionNames: [],
+        anonymizationInstruction: '',
      })
    ).toEqual(`first`);
  });
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/get_system_message_from_instructions.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/get_system_message_from_instructions.ts
@ -27,11 +27,14 @@ export function getSystemMessageFromInstructions({
  // instructions provided by the user via the API. These will be displayed after the application instructions and only if they fit within the token budget
  apiUserInstructions,
  availableFunctionNames,
+  // instructions for anonymization
+  anonymizationInstruction,
 }: {
  applicationInstructions: InstructionOrCallback[];
  kbUserInstructions: Instruction[];
  apiUserInstructions: Instruction[];
  availableFunctionNames: string[];
+  anonymizationInstruction: string;
 }): string {
  const allApplicationInstructions = compact(
    applicationInstructions.flatMap((instruction) => {
@ -55,6 +58,9 @@ export function getSystemMessageFromInstructions({

    // user instructions
    ...(allUserInstructions.length ? [USER_INSTRUCTIONS_HEADER, ...allUserInstructions] : []),
+
+    // anonymization instructions
+    ...(anonymizationInstruction ? [anonymizationInstruction] : []),
  ]
    .map((instruction) => {
      return typeof instruction === 'string' ? instruction : instruction.text;