[8.x] [Obs AI Assistant] Add API test for execute_query tool (#213517) (#214021)

# Backport

This will backport the following commits from `main` to `8.x`:
- [[Obs AI Assistant] Add API test for `execute_query` tool
(#213517)](https://github.com/elastic/kibana/pull/213517)

<!--- Backport version: 9.6.6 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sorenlouv/backport)

<!--BACKPORT [{"author":{"name":"Søren
Louv-Jansen","email":"soren.louv@elastic.co"},"sourceCommit":{"committedDate":"2025-03-11T21:45:28Z","message":"[Obs
AI Assistant] Add API test for `execute_query` tool (#213517)\n\n- Adds
test for `execute_query` function\n- Add `drop_null_columns` to esql
query in order to avoid column\nexplosion\n\nDepends on:
https://github.com/elastic/kibana/pull/213231\n\n---------\n\nCo-authored-by:
Viduni Wickramarachchi
<viduni.ushanka@gmail.com>","sha":"3ed6e4583e254dec62e79bfda95f369470e66de4","branchLabelMapping":{"^v9.1.0$":"main","^v8.19.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","v9.0.0","Team:Obs
AI
Assistant","ci:project-deploy-observability","backport:version","v9.1.0","v8.19.0"],"title":"[Obs
AI Assistant] Add API test for `execute_query`
tool","number":213517,"url":"https://github.com/elastic/kibana/pull/213517","mergeCommit":{"message":"[Obs
AI Assistant] Add API test for `execute_query` tool (#213517)\n\n- Adds
test for `execute_query` function\n- Add `drop_null_columns` to esql
query in order to avoid column\nexplosion\n\nDepends on:
https://github.com/elastic/kibana/pull/213231\n\n---------\n\nCo-authored-by:
Viduni Wickramarachchi
<viduni.ushanka@gmail.com>","sha":"3ed6e4583e254dec62e79bfda95f369470e66de4"}},"sourceBranch":"main","suggestedTargetBranches":["9.0","8.x"],"targetPullRequestStates":[{"branch":"9.0","label":"v9.0.0","branchLabelMappingKey":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"main","label":"v9.1.0","branchLabelMappingKey":"^v9.1.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/213517","number":213517,"mergeCommit":{"message":"[Obs
AI Assistant] Add API test for `execute_query` tool (#213517)\n\n- Adds
test for `execute_query` function\n- Add `drop_null_columns` to esql
query in order to avoid column\nexplosion\n\nDepends on:
https://github.com/elastic/kibana/pull/213231\n\n---------\n\nCo-authored-by:
Viduni Wickramarachchi
<viduni.ushanka@gmail.com>","sha":"3ed6e4583e254dec62e79bfda95f369470e66de4"}},{"branch":"8.x","label":"v8.19.0","branchLabelMappingKey":"^v8.19.0$","isSourceBranch":false,"state":"NOT_CREATED"}]}]
BACKPORT-->

Co-authored-by: Søren Louv-Jansen <soren.louv@elastic.co>
This commit is contained in:
Kibana Machine 2025-03-12 10:39:00 +11:00 committed by GitHub
parent dbe31ef45a
commit ce08437e98
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 237 additions and 35 deletions

View file

@ -28,6 +28,7 @@ export function registerQueryFunction({
functions,
resources,
pluginsStart,
signal,
}: FunctionRegistrationParameters) {
functions.registerInstruction(({ availableFunctionNames }) => {
if (!availableFunctionNames.includes(QUERY_FUNCTION_NAME)) {
@ -83,6 +84,7 @@ export function registerQueryFunction({
const { error, errorMessages, rows, columns } = await runAndValidateEsqlQuery({
query: correctedQuery,
client,
signal,
});
if (!!error) {
@ -113,7 +115,7 @@ export function registerQueryFunction({
function takes no input.`,
visibility: FunctionVisibility.AssistantOnly,
},
async ({ messages, connectorId, simulateFunctionCalling }, signal) => {
async ({ messages, connectorId, simulateFunctionCalling }) => {
const esqlFunctions = functions
.getFunctions()
.filter(

View file

@ -16,9 +16,11 @@ import { splitIntoCommands } from '@kbn/inference-plugin/common';
export async function runAndValidateEsqlQuery({
query,
client,
signal,
}: {
query: string;
client: ElasticsearchClient;
signal: AbortSignal;
}): Promise<{
columns?: DatatableColumn[];
rows?: ESQLRow[];
@ -47,30 +49,22 @@ export async function runAndValidateEsqlQuery({
return 'text' in error ? error.text : error.message;
});
return client.transport
.request({
method: 'POST',
path: '_query',
body: {
query,
},
})
.then((res) => {
const esqlResponse = res as ESQLSearchResponse;
try {
const res = await client.esql.query({ query, drop_null_columns: true }, { signal });
const esqlResponse = res as unknown as ESQLSearchResponse;
const columns =
esqlResponse.columns?.map(({ name, type }) => ({
id: name,
name,
meta: { type: esFieldTypeToKibanaFieldType(type) as DatatableColumnType },
})) ?? [];
const columns =
esqlResponse.columns?.map(({ name, type }) => ({
id: name,
name,
meta: { type: esFieldTypeToKibanaFieldType(type) as DatatableColumnType },
})) ?? [];
return { columns, rows: esqlResponse.values };
})
.catch((error) => {
return {
error,
...(errorMessages.length ? { errorMessages } : {}),
};
});
return { columns, rows: esqlResponse.values };
} catch (error) {
return {
error,
...(errorMessages.length ? { errorMessages } : {}),
};
}
}

View file

@ -30,6 +30,7 @@ const getMessageForLLM = (
export function registerVisualizeESQLFunction({
functions,
resources,
signal,
}: FunctionRegistrationParameters) {
functions.registerFunction(
visualizeESQLFunction,
@ -43,6 +44,7 @@ export function registerVisualizeESQLFunction({
const { columns, errorMessages, rows, error } = await runAndValidateEsqlQuery({
query: correctedQuery,
client: (await resources.context.core).elasticsearch.client.asCurrentUser,
signal,
});
const message = getMessageForLLM(intention, query, Boolean(errorMessages?.length));

View file

@ -0,0 +1,210 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { MessageAddEvent } from '@kbn/observability-ai-assistant-plugin/common';
import expect from '@kbn/expect';
import { LogsSynthtraceEsClient } from '@kbn/apm-synthtrace';
import { last } from 'lodash';
import { ChatCompletionStreamParams } from 'openai/lib/ChatCompletionStream';
import { EsqlResponse } from '@elastic/elasticsearch/lib/helpers';
import {
LlmProxy,
createLlmProxy,
} from '../../../../../../../observability_ai_assistant_api_integration/common/create_llm_proxy';
import { chatComplete } from './helpers';
import type { DeploymentAgnosticFtrProviderContext } from '../../../../../ftr_provider_context';
import { createSimpleSyntheticLogs } from '../../synthtrace_scenarios/simple_logs';
export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderContext) {
const log = getService('log');
const observabilityAIAssistantAPIClient = getService('observabilityAIAssistantApi');
const synthtrace = getService('synthtrace');
describe('execute_query', function () {
this.tags(['failsOnMKI']);
let llmProxy: LlmProxy;
let connectorId: string;
before(async () => {
llmProxy = await createLlmProxy(log);
connectorId = await observabilityAIAssistantAPIClient.createProxyActionConnector({
port: llmProxy.getPort(),
});
});
after(async () => {
llmProxy.close();
await observabilityAIAssistantAPIClient.deleteActionConnector({
actionId: connectorId,
});
});
// Calling `execute_query` via the chat/complete endpoint
describe('POST /internal/observability_ai_assistant/chat/complete', function () {
let messageAddedEvents: MessageAddEvent[];
let logsSynthtraceEsClient: LogsSynthtraceEsClient;
let firstRequestBody: ChatCompletionStreamParams;
let secondRequestBody: ChatCompletionStreamParams;
let thirdRequestBody: ChatCompletionStreamParams;
let fourthRequestBody: ChatCompletionStreamParams;
before(async () => {
logsSynthtraceEsClient = synthtrace.createLogsSynthtraceEsClient();
await createSimpleSyntheticLogs({
logsSynthtraceEsClient,
dataset: 'apache.access',
});
void llmProxy.interceptWithFunctionRequest({
name: 'query',
arguments: () => JSON.stringify({}),
when: () => true,
});
void llmProxy.interceptWithFunctionRequest({
name: 'structuredOutput',
arguments: () => JSON.stringify({}),
// @ts-expect-error
when: (requestBody) => requestBody.tool_choice?.function?.name === 'structuredOutput',
});
void llmProxy.interceptWithFunctionRequest({
name: 'execute_query',
arguments: () =>
JSON.stringify({
query: `FROM logs-apache.access-default
| KEEP message
| SORT @timestamp DESC
| LIMIT 10`,
}),
when: () => true,
});
void llmProxy.interceptConversation({ content: 'Hello from user' });
({ messageAddedEvents } = await chatComplete({
userPrompt: 'Please retrieve the most recent Apache log messages',
connectorId,
observabilityAIAssistantAPIClient,
}));
await llmProxy.waitForAllInterceptorsToHaveBeenCalled();
firstRequestBody = llmProxy.interceptedRequests[0].requestBody;
secondRequestBody = llmProxy.interceptedRequests[1].requestBody;
thirdRequestBody = llmProxy.interceptedRequests[2].requestBody;
fourthRequestBody = llmProxy.interceptedRequests[3].requestBody;
});
after(async () => {
await logsSynthtraceEsClient.clean();
});
it('makes 4 requests to the LLM', () => {
expect(llmProxy.interceptedRequests.length).to.be(4);
});
it('emits 7 messageAdded events', () => {
expect(messageAddedEvents.length).to.be(7);
});
describe('First LLM request - Initial tool selection', () => {
it('exposes the right tools', () => {
expect(firstRequestBody.tools?.map((t) => t.function.name)).to.eql([
'query',
'get_alerts_dataset_info',
'alerts',
'changes',
'elasticsearch',
'kibana',
'get_dataset_info',
'execute_connector',
]);
});
});
describe('The second request - Structured output validation', () => {
it('contains the correct number of messages', () => {
expect(secondRequestBody.messages.length).to.be(6);
});
it('contains the `structuredOutput` tool choice', () => {
// @ts-expect-error
const hasToolChoice = secondRequestBody.tool_choice.function?.name === 'structuredOutput';
expect(hasToolChoice).to.be(true);
});
it('contains user message with information about how to request ESQL documentation', () => {
expect(last(secondRequestBody.messages)?.content).to.contain(
'Based on the previous conversation, request documentation'
);
});
});
describe('The third request - Requesting ESQL documentation', () => {
it('contains the `request_documentation` tool call request', () => {
const hasToolCall = thirdRequestBody.messages.some(
// @ts-expect-error
(message) => message.tool_calls?.[0]?.function?.name === 'request_documentation'
);
expect(hasToolCall).to.be(true);
});
it('contains ESQL documentation', () => {
const parsed = JSON.parse(last(thirdRequestBody.messages)?.content as string);
expect(parsed.documentation.OPERATORS).to.contain('Binary Operators');
});
it('allows the LLM to call the tools execute_query, visualize_query and request_documentation', () => {
expect(thirdRequestBody.tools?.map((t) => t.function.name)).to.eql([
'execute_query',
'visualize_query',
'request_documentation',
]);
});
});
describe('The fourth request - Executing the ES|QL query', () => {
it('contains the `execute_query` tool call request', () => {
const hasToolCall = fourthRequestBody.messages.some(
// @ts-expect-error
(message) => message.tool_calls?.[0]?.function?.name === 'execute_query'
);
expect(hasToolCall).to.be(true);
});
it('emits a messageAdded event with the `execute_query` tool response', () => {
const event = messageAddedEvents.find(
({ message }) => message.message.name === 'execute_query'
);
expect(event?.message.message.content).to.contain('simple log message');
});
describe('the `execute_query` tool call response', () => {
let toolCallResponse: { columns: EsqlResponse['columns']; rows: EsqlResponse['values'] };
before(async () => {
toolCallResponse = JSON.parse(last(fourthRequestBody.messages)?.content as string);
});
it('has the correct columns', () => {
expect(toolCallResponse.columns.map(({ name }) => name)).to.eql([
'message',
'@timestamp',
]);
});
it('has the correct number of rows', () => {
expect(toolCallResponse.rows.length).to.be(10);
});
it('has the right log message', () => {
expect(toolCallResponse.rows[0][0]).to.be('simple log message');
});
});
});
});
});
}

View file

@ -18,6 +18,7 @@ export default function aiAssistantApiIntegrationTests({
loadTestFile(require.resolve('./complete/functions/alerts.spec.ts'));
loadTestFile(require.resolve('./complete/functions/get_alerts_dataset_info.spec.ts'));
loadTestFile(require.resolve('./complete/functions/get_dataset_info.spec.ts'));
loadTestFile(require.resolve('./complete/functions/execute_query.spec.ts'));
loadTestFile(require.resolve('./complete/functions/elasticsearch.spec.ts'));
loadTestFile(require.resolve('./complete/functions/summarize.spec.ts'));
loadTestFile(require.resolve('./public_complete/public_complete.spec.ts'));

View file

@ -11,6 +11,7 @@ import { ChatFeedback } from '@kbn/observability-ai-assistant-plugin/public/anal
import { pick } from 'lodash';
import { parse as parseCookie } from 'tough-cookie';
import { kbnTestConfig } from '@kbn/test';
import { systemMessageSorted } from '../../../api_integration/deployment_agnostic/apis/observability/ai_assistant/complete/functions/helpers';
import {
createLlmProxy,
LlmProxy,
@ -254,8 +255,8 @@ export default function ApiTest({ getService, getPageObjects }: FtrProviderConte
'You are a helpful assistant for Elastic Observability. Your goal is '
);
expect(sortSystemMessage(systemMessage!)).to.eql(
sortSystemMessage(primarySystemMessage)
expect(systemMessageSorted(systemMessage!)).to.eql(
systemMessageSorted(primarySystemMessage)
);
expect(firstUserMessage.content).to.eql('hello');
@ -434,11 +435,3 @@ export default function ApiTest({ getService, getPageObjects }: FtrProviderConte
});
});
}
// order of instructions can vary, so we sort to compare them
function sortSystemMessage(message: string) {
return message
.split('\n\n')
.map((line) => line.trim())
.sort();
}