[Obs AI Assistant] [Anonymization] Update system prompt to inform about anonymization (#224211)

Closes https://github.com/elastic/observability-dev/issues/4563
## Summary

This PR updates the Obs AI Assistant system prompt to explicitly inform
the LLM about the presence of anonymized entities (e.g., hashes or
placeholder tokens). The goal is to prevent the LLM from attempting to
interpret or hallucinate the meaning of these anonymized tokens.

What Changed
- Modified the system prompt to include a new instruction:
```txt
Some entities in this conversation (like names, locations, or IDs) have been anonymized using placeholder hashes (e.g., `PER_123`, `LOC_abcd1234`). These tokens should be treated as distinct but semantically unknown entities. Do not try to infer their meaning. Refer to them as-is unless explicitly provided with a description.
```
- This instruction is now included in all prompts sent to the LLM as
part of the chat completion setup when there are anonymization rules.
This commit is contained in:
Arturo Lidueña 2025-06-22 21:11:18 +02:00 committed by GitHub
parent 9201bad4e1
commit e2a833785b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 44 additions and 3 deletions

View file

@ -8,7 +8,7 @@
import { DetectedEntity } from '../../types';
/** Regex matching objecthash placeholders (40 hex chars) */
export const HASH_REGEX = /[0-9a-f]{40}/g;
export const HASH_REGEX = /\b[A-Z]+_[0-9a-f]{40}\b/g;
/** Default model ID for named entity recognition */
export const NER_MODEL_ID = 'elastic__distilbert-base-uncased-finetuned-conll03-english';
@ -22,7 +22,8 @@ export function redactEntities(original: string, entities: DetectedEntity[]): st
.slice()
.sort((a, b) => b.start_pos - a.start_pos)
.forEach((e) => {
redacted = redacted.slice(0, e.start_pos) + e.hash + redacted.slice(e.end_pos);
redacted =
redacted.slice(0, e.start_pos) + e.class_name + '_' + e.hash + redacted.slice(e.end_pos);
});
return redacted;

View file

@ -67,6 +67,8 @@ const getFunctionsRoute = createObservabilityAIAssistantServerRoute({
const availableFunctionNames = functionDefinitions.map((def) => def.name);
const anonymizationService = client.getAnonymizationService();
return {
functionDefinitions,
systemMessage: getSystemMessageFromInstructions({
@ -74,6 +76,7 @@ const getFunctionsRoute = createObservabilityAIAssistantServerRoute({
kbUserInstructions,
apiUserInstructions: [],
availableFunctionNames,
anonymizationInstruction: anonymizationService.getAnonymizationInstruction(),
}),
};
},

View file

@ -110,7 +110,7 @@ export class AnonymizationService {
// Update hashMap
entities.forEach((e) => {
this.currentHashMap.set(e.hash, {
this.currentHashMap.set(e.class_name + '_' + e.hash, {
value: e.entity,
class_name: e.class_name,
type: e.type,
@ -218,4 +218,24 @@ export class AnonymizationService {
);
};
}
isEnabled(): boolean {
return this.rules.some((rule) => rule.enabled);
}
getAnonymizationInstruction(): string {
if (!this.isEnabled()) return '';
const nerClasses = ['PER', 'LOC', 'ORG', 'MISC'];
const regexClasses = this.rules
.filter((rule) => rule.type === 'regex' && rule.enabled)
.map((rule) => (rule as RegexAnonymizationRule).entityClass);
const allClasses = [...nerClasses, ...regexClasses];
const exampleTokens = allClasses.map((c) => `\`${c}_abc123\``).join(', ');
return `Some entities in this conversation have been anonymized using placeholder tokens (e.g., ${exampleTokens}).
These represent named entities such as people (PER), locations (LOC), organizations (ORG), and miscellaneous types (MISC), ${
regexClasses.length ? `as well as custom types like ${regexClasses.join(', ')}. ` : ''
}
Do not attempt to infer their meaning, type, or real-world identity. Refer to them exactly as they appear unless explicitly resolved or described.`;
}
}

View file

@ -249,6 +249,8 @@ export class ObservabilityAIAssistantClient {
availableFunctionNames: disableFunctions
? []
: functionClient.getFunctions().map((fn) => fn.definition.name),
anonymizationInstruction:
this.dependencies.anonymizationService.getAnonymizationInstruction(),
})
),
shareReplay()
@ -876,4 +878,8 @@ export class ObservabilityAIAssistantClient {
this.dependencies.user
);
};
getAnonymizationService = () => {
return this.dependencies.anonymizationService;
};
}

View file

@ -17,6 +17,7 @@ describe('getSystemMessageFromInstructions', () => {
kbUserInstructions: [],
apiUserInstructions: [],
availableFunctionNames: [],
anonymizationInstruction: '',
})
).toEqual(`first\n\nsecond`);
});
@ -33,6 +34,7 @@ describe('getSystemMessageFromInstructions', () => {
kbUserInstructions: [],
apiUserInstructions: [],
availableFunctionNames: ['myFunction'],
anonymizationInstruction: '',
})
).toEqual(`first\n\nmyFunction`);
});
@ -49,6 +51,7 @@ describe('getSystemMessageFromInstructions', () => {
},
],
availableFunctionNames: [],
anonymizationInstruction: '',
})
).toEqual(`first\n\n${USER_INSTRUCTIONS_HEADER}\n\nsecond from adhoc instruction`);
});
@ -60,6 +63,7 @@ describe('getSystemMessageFromInstructions', () => {
kbUserInstructions: [{ id: 'second', text: 'second_kb' }],
apiUserInstructions: [],
availableFunctionNames: [],
anonymizationInstruction: '',
})
).toEqual(`first\n\n${USER_INSTRUCTIONS_HEADER}\n\nsecond_kb`);
});
@ -76,6 +80,7 @@ describe('getSystemMessageFromInstructions', () => {
kbUserInstructions: [],
apiUserInstructions: [],
availableFunctionNames: [],
anonymizationInstruction: '',
})
).toEqual(`first`);
});

View file

@ -27,11 +27,14 @@ export function getSystemMessageFromInstructions({
// instructions provided by the user via the API. These will be displayed after the application instructions and only if they fit within the token budget
apiUserInstructions,
availableFunctionNames,
// instructions for anonymization
anonymizationInstruction,
}: {
applicationInstructions: InstructionOrCallback[];
kbUserInstructions: Instruction[];
apiUserInstructions: Instruction[];
availableFunctionNames: string[];
anonymizationInstruction: string;
}): string {
const allApplicationInstructions = compact(
applicationInstructions.flatMap((instruction) => {
@ -55,6 +58,9 @@ export function getSystemMessageFromInstructions({
// user instructions
...(allUserInstructions.length ? [USER_INSTRUCTIONS_HEADER, ...allUserInstructions] : []),
// anonymization instructions
...(anonymizationInstruction ? [anonymizationInstruction] : []),
]
.map((instruction) => {
return typeof instruction === 'string' ? instruction : instruction.text;