mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 17:28:26 -04:00
[Obs AI Assistant] Improve error handling in the evaluation framework (#212991)
Closes https://github.com/elastic/obs-ai-assistant-team/issues/196 ## Summary This PR implements the follows: - Slightly increase the backoff delay for `429` errors - Improve `convertMessagesForInference` to surface errors related to function calls - Improve the KB retrieval scenario criteria ### Checklist - [x] The PR description includes the appropriate Release Notes section, and the correct `release_note:*` label is applied per the [guidelines](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)
This commit is contained in:
parent
0210468548
commit
6bb27b0400
5 changed files with 31 additions and 11 deletions
|
@ -11,9 +11,28 @@ import {
|
|||
MessageRole as InferenceMessageRole,
|
||||
} from '@kbn/inference-common';
|
||||
import { generateFakeToolCallId } from '@kbn/inference-plugin/common';
|
||||
import type { Logger } from '@kbn/logging';
|
||||
import { Message, MessageRole } from '.';
|
||||
|
||||
export function convertMessagesForInference(messages: Message[]): InferenceMessage[] {
|
||||
function safeJsonParse(jsonString: string | undefined, logger: Pick<Logger, 'error'>) {
|
||||
try {
|
||||
return JSON.parse(jsonString ?? '{}');
|
||||
} catch (error) {
|
||||
logger.error(
|
||||
`Failed to parse function call arguments when converting messages for inference: ${error}`
|
||||
);
|
||||
// if the LLM returns invalid JSON, it is likley because it is hallucinating
|
||||
// the function. We don't want to propogate the error about invalid JSON here.
|
||||
// Any errors related to the function call will be caught when the function and
|
||||
// it's arguments are validated
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
export function convertMessagesForInference(
|
||||
messages: Message[],
|
||||
logger: Pick<Logger, 'error'>
|
||||
): InferenceMessage[] {
|
||||
const inferenceMessages: InferenceMessage[] = [];
|
||||
|
||||
messages.forEach((message) => {
|
||||
|
@ -27,7 +46,7 @@ export function convertMessagesForInference(messages: Message[]): InferenceMessa
|
|||
{
|
||||
function: {
|
||||
name: message.message.function_call.name,
|
||||
arguments: JSON.parse(message.message.function_call.arguments || '{}'),
|
||||
arguments: safeJsonParse(message.message.function_call.arguments, logger),
|
||||
},
|
||||
toolCallId: generateFakeToolCallId(),
|
||||
},
|
||||
|
|
|
@ -486,15 +486,14 @@ export class ObservabilityAIAssistantClient {
|
|||
const options = {
|
||||
connectorId,
|
||||
system: systemMessage,
|
||||
messages: convertMessagesForInference(messages),
|
||||
messages: convertMessagesForInference(messages, this.dependencies.logger),
|
||||
toolChoice,
|
||||
tools,
|
||||
functionCalling: (simulateFunctionCalling ? 'simulated' : 'auto') as FunctionCallingMode,
|
||||
};
|
||||
|
||||
this.dependencies.logger.debug(
|
||||
() =>
|
||||
`Calling inference client with for name: "${name}" with options: ${JSON.stringify(options)}`
|
||||
() => `Calling inference client for name: "${name}" with options: ${JSON.stringify(options)}`
|
||||
);
|
||||
|
||||
if (stream) {
|
||||
|
|
|
@ -328,10 +328,10 @@ export class KibanaClient {
|
|||
}
|
||||
|
||||
if (error.message.includes('Status code: 429')) {
|
||||
that.log.info(`429, backing off 20s`);
|
||||
|
||||
return timer(20000);
|
||||
that.log.info(`429, backing off 30s`);
|
||||
return timer(30000);
|
||||
}
|
||||
|
||||
that.log.info(`Retrying in 5s`);
|
||||
return timer(5000);
|
||||
},
|
||||
|
|
|
@ -96,7 +96,7 @@ describe('Knowledge base', () => {
|
|||
const conversation = await chatClient.complete({ messages: prompt });
|
||||
|
||||
const result = await chatClient.evaluate(conversation, [
|
||||
'Uses KB retrieval function to find information about the Quantum Revectorization Engine',
|
||||
'Uses context function response to find information about the Quantum Revectorization Engine',
|
||||
'Correctly identifies Dr. Eliana Stone at Acme Labs in 2023 as the inventor',
|
||||
'Accurately describes that it reorders the subatomic structure of materials and can transform silicon wafers into superconductive materials',
|
||||
'Does not invent unrelated or hallucinated details not present in the KB',
|
||||
|
@ -111,7 +111,7 @@ describe('Knowledge base', () => {
|
|||
const conversation = await chatClient.complete({ messages: prompt });
|
||||
|
||||
const result = await chatClient.evaluate(conversation, [
|
||||
'Uses KB retrieval function to find the correct document about QRE constraints',
|
||||
'Uses context function response to find the correct document about QRE constraints',
|
||||
'Mentions the 2 nanometer limit on the revectorization radius',
|
||||
'Mentions that specialized fusion reactors are needed',
|
||||
'Does not mention information unrelated to constraints or energy (i.e., does not mention the inventor or silicon wafer transformation from doc-invention-1)',
|
||||
|
|
|
@ -103,6 +103,7 @@ export function registerQueryFunction({
|
|||
};
|
||||
}
|
||||
);
|
||||
|
||||
functions.registerFunction(
|
||||
{
|
||||
name: QUERY_FUNCTION_NAME,
|
||||
|
@ -129,7 +130,8 @@ export function registerQueryFunction({
|
|||
connectorId,
|
||||
messages: convertMessagesForInference(
|
||||
// remove system message and query function request
|
||||
messages.filter((message) => message.message.role !== MessageRole.System).slice(0, -1)
|
||||
messages.filter((message) => message.message.role !== MessageRole.System).slice(0, -1),
|
||||
resources.logger
|
||||
),
|
||||
logger: resources.logger,
|
||||
tools: Object.fromEntries(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue