[Obs AI Assistant] Improve error handling in the evaluation framework (#212991)

Closes https://github.com/elastic/obs-ai-assistant-team/issues/196

## Summary

This PR implements the follows:
- Slightly increase the backoff delay for `429` errors
- Improve `convertMessagesForInference` to surface errors related to
function calls
- Improve the KB retrieval scenario criteria

### Checklist

- [x] The PR description includes the appropriate Release Notes section,
and the correct `release_note:*` label is applied per the
[guidelines](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)
This commit is contained in:
Viduni Wickramarachchi 2025-03-06 19:59:44 -05:00 committed by GitHub
parent 0210468548
commit 6bb27b0400
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 31 additions and 11 deletions

View file

@ -11,9 +11,28 @@ import {
MessageRole as InferenceMessageRole,
} from '@kbn/inference-common';
import { generateFakeToolCallId } from '@kbn/inference-plugin/common';
import type { Logger } from '@kbn/logging';
import { Message, MessageRole } from '.';
export function convertMessagesForInference(messages: Message[]): InferenceMessage[] {
function safeJsonParse(jsonString: string | undefined, logger: Pick<Logger, 'error'>) {
try {
return JSON.parse(jsonString ?? '{}');
} catch (error) {
logger.error(
`Failed to parse function call arguments when converting messages for inference: ${error}`
);
// if the LLM returns invalid JSON, it is likley because it is hallucinating
// the function. We don't want to propogate the error about invalid JSON here.
// Any errors related to the function call will be caught when the function and
// it's arguments are validated
return {};
}
}
export function convertMessagesForInference(
messages: Message[],
logger: Pick<Logger, 'error'>
): InferenceMessage[] {
const inferenceMessages: InferenceMessage[] = [];
messages.forEach((message) => {
@ -27,7 +46,7 @@ export function convertMessagesForInference(messages: Message[]): InferenceMessa
{
function: {
name: message.message.function_call.name,
arguments: JSON.parse(message.message.function_call.arguments || '{}'),
arguments: safeJsonParse(message.message.function_call.arguments, logger),
},
toolCallId: generateFakeToolCallId(),
},

View file

@ -486,15 +486,14 @@ export class ObservabilityAIAssistantClient {
const options = {
connectorId,
system: systemMessage,
messages: convertMessagesForInference(messages),
messages: convertMessagesForInference(messages, this.dependencies.logger),
toolChoice,
tools,
functionCalling: (simulateFunctionCalling ? 'simulated' : 'auto') as FunctionCallingMode,
};
this.dependencies.logger.debug(
() =>
`Calling inference client with for name: "${name}" with options: ${JSON.stringify(options)}`
() => `Calling inference client for name: "${name}" with options: ${JSON.stringify(options)}`
);
if (stream) {

View file

@ -328,10 +328,10 @@ export class KibanaClient {
}
if (error.message.includes('Status code: 429')) {
that.log.info(`429, backing off 20s`);
return timer(20000);
that.log.info(`429, backing off 30s`);
return timer(30000);
}
that.log.info(`Retrying in 5s`);
return timer(5000);
},

View file

@ -96,7 +96,7 @@ describe('Knowledge base', () => {
const conversation = await chatClient.complete({ messages: prompt });
const result = await chatClient.evaluate(conversation, [
'Uses KB retrieval function to find information about the Quantum Revectorization Engine',
'Uses context function response to find information about the Quantum Revectorization Engine',
'Correctly identifies Dr. Eliana Stone at Acme Labs in 2023 as the inventor',
'Accurately describes that it reorders the subatomic structure of materials and can transform silicon wafers into superconductive materials',
'Does not invent unrelated or hallucinated details not present in the KB',
@ -111,7 +111,7 @@ describe('Knowledge base', () => {
const conversation = await chatClient.complete({ messages: prompt });
const result = await chatClient.evaluate(conversation, [
'Uses KB retrieval function to find the correct document about QRE constraints',
'Uses context function response to find the correct document about QRE constraints',
'Mentions the 2 nanometer limit on the revectorization radius',
'Mentions that specialized fusion reactors are needed',
'Does not mention information unrelated to constraints or energy (i.e., does not mention the inventor or silicon wafer transformation from doc-invention-1)',

View file

@ -103,6 +103,7 @@ export function registerQueryFunction({
};
}
);
functions.registerFunction(
{
name: QUERY_FUNCTION_NAME,
@ -129,7 +130,8 @@ export function registerQueryFunction({
connectorId,
messages: convertMessagesForInference(
// remove system message and query function request
messages.filter((message) => message.message.role !== MessageRole.System).slice(0, -1)
messages.filter((message) => message.message.role !== MessageRole.System).slice(0, -1),
resources.logger
),
logger: resources.logger,
tools: Object.fromEntries(