[Rule Migration] Adding CIM to ECS mapping and ESQL validation (#202331)

## Summary

This PR adds the initial context to map CIM fields to ECS and two new
nodes validation and a node to handle esql validation issues, fixing
itself.

This is how the graph looks compared to its old one:
<img width="646" alt="image"
src="https://github.com/user-attachments/assets/253e449c-ac6f-4913-8da4-eb36f4e7b982">


Validation always runs last, and if validation returns any errors it
will run the appropriate node depending on what validation failed. Once
it is resolved it will validate again and then END when its successful.

Currently 5 error iterations is max, which is just an arbitrary number.
The default Langgraph configuration is 25 nodes executed in total for a
specific graph before it errors with a recursion limit (main and sub
graphs are not combined in that count).

A few things are included in this PR:

- Moved ESQL KB caller to util(any better place?), as it is now used in
multiple nodes.
- New Validation node, where any sort of validation takes place, usually
the last step before ending the graph (on success).
- New ESQL Error node, to resolve any ESQL validation errors and trigger
a re-validation.
- Fix a small bug in the main graph on the conditional edges, added a
map for the allowed return values.
This commit is contained in:
Marius Iversen 2024-12-03 11:51:19 +01:00 committed by GitHub
parent f0fbefa144
commit c1d976b470
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 461 additions and 24 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

After

Width:  |  Height:  |  Size: 41 KiB

Before After
Before After

View file

@ -48,5 +48,5 @@ const matchedPrebuiltRuleConditional = (state: MigrateRuleState) => {
if (state.elastic_rule?.prebuilt_rule_id) {
return END;
}
return 'translation';
return 'translationSubGraph';
};

View file

@ -6,11 +6,18 @@
*/
import { END, START, StateGraph } from '@langchain/langgraph';
import { isEmpty } from 'lodash/fp';
import { SiemMigrationRuleTranslationResult } from '../../../../../../../../common/siem_migrations/constants';
import { getFixQueryErrorsNode } from './nodes/fix_query_errors';
import { getProcessQueryNode } from './nodes/process_query';
import { getRetrieveIntegrationsNode } from './nodes/retrieve_integrations';
import { getTranslateRuleNode } from './nodes/translate_rule';
import { getValidationNode } from './nodes/validation';
import { translateRuleState } from './state';
import type { TranslateRuleGraphParams } from './types';
import type { TranslateRuleGraphParams, TranslateRuleState } from './types';
// How many times we will try to self-heal when validation fails, to prevent infinite graph recursions
const MAX_VALIDATION_ITERATIONS = 3;
export function getTranslateRuleGraph({
model,
@ -35,19 +42,37 @@ export function getTranslateRuleGraph({
model,
integrationRetriever,
});
const validationNode = getValidationNode({ logger });
const fixQueryErrorsNode = getFixQueryErrorsNode({ inferenceClient, connectorId, logger });
const translateRuleGraph = new StateGraph(translateRuleState)
// Nodes
.addNode('processQuery', processQueryNode)
.addNode('retrieveIntegrations', retrieveIntegrationsNode)
.addNode('translateRule', translateRuleNode)
.addNode('validation', validationNode)
.addNode('fixQueryErrors', fixQueryErrorsNode)
// Edges
.addEdge(START, 'processQuery')
.addEdge('processQuery', 'retrieveIntegrations')
.addEdge('retrieveIntegrations', 'translateRule')
.addEdge('translateRule', END);
.addEdge('translateRule', 'validation')
.addEdge('fixQueryErrors', 'validation')
.addConditionalEdges('validation', validationRouter);
const graph = translateRuleGraph.compile();
graph.name = 'Translate Rule Graph';
return graph;
}
const validationRouter = (state: TranslateRuleState) => {
if (
state.validation_errors.iterations <= MAX_VALIDATION_ITERATIONS &&
state.translation_result === SiemMigrationRuleTranslationResult.FULL
) {
if (!isEmpty(state.validation_errors?.esql_errors)) {
return 'fixQueryErrors';
}
}
return END;
};

View file

@ -0,0 +1,38 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { Logger } from '@kbn/core/server';
import type { InferenceClient } from '@kbn/inference-plugin/server';
import { getEsqlKnowledgeBase } from '../../../../../util/esql_knowledge_base_caller';
import type { GraphNode } from '../../types';
import { RESOLVE_ESQL_ERRORS_TEMPLATE } from './prompts';
interface GetFixQueryErrorsNodeParams {
inferenceClient: InferenceClient;
connectorId: string;
logger: Logger;
}
export const getFixQueryErrorsNode = ({
inferenceClient,
connectorId,
logger,
}: GetFixQueryErrorsNodeParams): GraphNode => {
const esqlKnowledgeBaseCaller = getEsqlKnowledgeBase({ inferenceClient, connectorId, logger });
return async (state) => {
const rule = state.elastic_rule;
const prompt = await RESOLVE_ESQL_ERRORS_TEMPLATE.format({
esql_errors: state.validation_errors.esql_errors,
esql_query: rule.query,
});
const response = await esqlKnowledgeBaseCaller(prompt);
const esqlQuery = response.match(/```esql\n([\s\S]*?)\n```/)?.[1] ?? '';
rule.query = esqlQuery;
return { elastic_rule: rule };
};
};

View file

@ -0,0 +1,7 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
export { getFixQueryErrorsNode } from './fix_query_errors';

View file

@ -0,0 +1,42 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { ChatPromptTemplate } from '@langchain/core/prompts';
export const RESOLVE_ESQL_ERRORS_TEMPLATE =
ChatPromptTemplate.fromTemplate(`You are a helpful cybersecurity (SIEM) expert agent. Your task is to resolve the errors in the Elasticsearch Query Language (ES|QL) query provided by the user.
Below is the relevant errors related to the ES|SQL query:
<context>
<esql_errors>
{esql_errors}
</esql_errors>
<esql_query>
{esql_query}
</esql_query>
</context>
<guidelines>
- You will be provided with the currentl ES|QL query and its related errors.
- Try to resolve the errors in the ES|QL query as best as you can to make it work.
- You must respond only with the modified query inside a \`\`\`esql code block, nothing else similar to the example response below.
</guidelines>
<example_response>
A: Please find the modified ES|QL query below:
\`\`\`esql
FROM logs-endpoint.events.process-*
| WHERE process.executable LIKE \"%chown root%\"
| STATS count = COUNT(*), firstTime = MIN(@timestamp), lastTime = MAX(@timestamp) BY process.executable,
process.command_line,
host.name
| EVAL firstTime = TO_DATETIME(firstTime), lastTime = TO_DATETIME(lastTime)
\`\`\`
</example_response>
`);

View file

@ -29,11 +29,15 @@ export const getProcessQueryNode = ({
const replaceQueryResourcePrompt =
REPLACE_QUERY_RESOURCE_PROMPT.pipe(model).pipe(replaceQueryParser);
const resourceContext = getResourcesContext(resources);
query = await replaceQueryResourcePrompt.invoke({
const response = await replaceQueryResourcePrompt.invoke({
query: state.original_rule.query,
macros: resourceContext.macros,
lookup_tables: resourceContext.lists,
});
const splQuery = response.match(/```spl\n([\s\S]*?)\n```/)?.[1] ?? '';
if (splQuery) {
query = splQuery;
}
}
return { inline_query: query };
};

View file

@ -140,8 +140,14 @@ Divide the query up into separate section and go through each section one at a t
<example_response>
A: Please find the modified SPL query below:
\`\`\`json
{{"match": "Linux User Account Creation"}}
\`\`\`spl
sourcetype="linux:audit" \`linux_auditd_normalized_proctitle_process\`
| rename host as dest
| where LIKE (process_exec, "%chown root%")
| stats count min(_time) as firstTime max(_time) as lastTime by process_exec proctitle normalized_proctitle_delimiter dest
| convert timeformat="%Y-%m-%dT%H:%M:%S" ctime(firstTime)
| convert timeformat="%Y-%m-%dT%H:%M:%S" ctime(lastTime)
| search *
\`\`\`
</example_response>

View file

@ -0,0 +1,181 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
export const SIEM_RULE_MIGRATION_CIM_ECS_MAP = `
datamodel,object,source_field,ecs_field,data_type
Application_State,All_Application_State,dest,service.node.name,string
Application_State,All_Application_State,process,process.title,string
Application_State,All_Application_State,user,user.name,string
Application_State,Ports,dest_port,destination.port,number
Application_State,Ports,transport,network.transport,string
Application_State,Ports,transport_dest_port,destination.port,string
Application_State,Services,service,service.name,string
Application_State,Services,service_id,service.id,string
Application_State,Services,status,service.state,string
Authentication,Authentication,action,event.action,string
Authentication,Authentication,app,process.name,string
Authentication,Authentication,dest,host.name,string
Authentication,Authentication,duration,event.duration,number
Authentication,Authentication,signature,event.code,string
Authentication,Authentication,signature_id,event.reason,string
Authentication,Authentication,src,source.address,string
Authentication,Authentication,src_nt_domain,source.domain,string
Authentication,Authentication,user,user.name,string
Certificates,All_Certificates,dest_port,destination.port,number
Certificates,All_Certificates,duration,event.duration,number
Certificates,All_Certificates,src,source.address,string
Certificates,All_Certificates,src_port,source.port,number
Certificates,All_Certificates,transport,network.protocol,string
Certificates,SSL,ssl_end_time,tls.server.not_after,time
Certificates,SSL,ssl_hash,tls.server.hash,string
Certificates,SSL,ssl_issuer_common_name,tls.server.issuer,string
Certificates,SSL,ssl_issuer_locality,x509.issuer.locality,string
Certificates,SSL,ssl_issuer_organization,x509.issuer.organization,string
Certificates,SSL,ssl_issuer_state,x509.issuer.state_or_province,string
Certificates,SSL,ssl_issuer_unit,x509.issuer.organizational_unit,string
Certificates,SSL,ssl_publickey_algorithm,x509.public_key_algorithm,string
Certificates,SSL,ssl_serial,x509.serial_number,string
Certificates,SSL,ssl_signature_algorithm,x509.signature_algorithm,string
Certificates,SSL,ssl_start_time,x509.not_before,time
Certificates,SSL,ssl_subject,x509.subject.distinguished_name,string
Certificates,SSL,ssl_subject_common_name,x509.subject.common_name,string
Certificates,SSL,ssl_subject_locality,x509.subject.locality,string
Certificates,SSL,ssl_subject_organization,x509.subject.organization,string
Certificates,SSL,ssl_subject_state,x509.subject.state_or_province,string
Certificates,SSL,ssl_subject_unit,x509.subject.organizational_unit,string
Certificates,SSL,ssl_version,tls.version,string
Change,All_Changes,action,event.action,string
Change,Account_Management,dest_nt_domain,destination.domain,string
Change,Account_Management,src_nt_domain,source.domain,string
Change,Account_Management,src_user,source.user,string
Intrusion_Detection,IDS_Attacks,action,event.action,string
Intrusion_Detection,IDS_Attacks,dest,destination.address,string
Intrusion_Detection,IDS_Attacks,dest_port,destination.port,number
Intrusion_Detection,IDS_Attacks,dvc,observer.hostname,string
Intrusion_Detection,IDS_Attacks,severity,event.severity,string
Intrusion_Detection,IDS_Attacks,src,source.ip,string
Intrusion_Detection,IDS_Attacks,user,source.user,string
JVM,OS,os,host.os.name,string
JVM,OS,os_architecture,host.architecture,string
JVM,OS,os_version,host.os.version,string
Malware,Malware_Attacks,action,event.action,string
Malware,Malware_Attacks,date,event.created,string
Malware,Malware_Attacks,dest,host.hostname,string
Malware,Malware_Attacks,file_hash,file.hash.*,string
Malware,Malware_Attacks,file_name,file.name,string
Malware,Malware_Attacks,file_path,file.path,string
Malware,Malware_Attacks,Sender,source.user.email,string
Malware,Malware_Attacks,src,source.ip,string
Malware,Malware_Attacks,user,related.user,string
Malware,Malware_Attacks,url,rule.reference,string
Network_Resolution,DNS,answer,dns.answers,string
Network_Resolution,DNS,dest,destination.address,string
Network_Resolution,DNS,dest_port,destination.port,number
Network_Resolution,DNS,duration,event.duration,number
Network_Resolution,DNS,message_type,dns.type,string
Network_Resolution,DNS,name,dns.question.name,string
Network_Resolution,DNS,query,dns.question.name,string
Network_Resolution,DNS,query_type,dns.op_code,string
Network_Resolution,DNS,record_type,dns.question.type,string
Network_Resolution,DNS,reply_code,dns.response_code,string
Network_Resolution,DNS,reply_code_id,dns.id,number
Network_Resolution,DNS,response_time,event.duration,number
Network_Resolution,DNS,src,source.address,string
Network_Resolution,DNS,src_port,source.port,number
Network_Resolution,DNS,transaction_id,dns.id,number
Network_Resolution,DNS,transport,network.transport,string
Network_Resolution,DNS,ttl,dns.answers.ttl,number
Network_Sessions,All_Sessions,action,event.action,string
Network_Sessions,All_Sessions,dest_ip,destination.ip,string
Network_Sessions,All_Sessions,dest_mac,destination.mac,string
Network_Sessions,All_Sessions,duration,event.duration,number
Network_Sessions,All_Sessions,src_dns,source.registered_domain,string
Network_Sessions,All_Sessions,src_ip,source.ip,string
Network_Sessions,All_Sessions,src_mac,source.mac,string
Network_Sessions,All_Sessions,user,user.name,string
Network_Traffic,All_Traffic,action,event.action,string
Network_Traffic,All_Traffic,app,network.protocol,string
Network_Traffic,All_Traffic,bytes,network.bytes,number
Network_Traffic,All_Traffic,dest,destination.ip,string
Network_Traffic,All_Traffic,dest_ip,destination.ip,string
Network_Traffic,All_Traffic,dest_mac,destination.mac,string
Network_Traffic,All_Traffic,dest_port,destination.port,number
Network_Traffic,All_Traffic,dest_translated_ip,destination.nat.ip,string
Network_Traffic,All_Traffic,dest_translated_port,destination.nat.port,number
Network_Traffic,All_Traffic,direction,network.direction,string
Network_Traffic,All_Traffic,duration,event.duration,number
Network_Traffic,All_Traffic,dvc,observer.name,string
Network_Traffic,All_Traffic,dvc_ip,observer.ip,string
Network_Traffic,All_Traffic,dvc_mac,observer.mac,string
Network_Traffic,All_Traffic,dvc_zone,observer.egress.zone,string
Network_Traffic,All_Traffic,packets,network.packets,number
Network_Traffic,All_Traffic,packets_in,source.packets,number
Network_Traffic,All_Traffic,packets_out,destination.packets,number
Network_Traffic,All_Traffic,protocol,network.protocol,string
Network_Traffic,All_Traffic,rule,rule.name,string
Network_Traffic,All_Traffic,src,source.address,string
Network_Traffic,All_Traffic,src_ip,source.ip,string
Network_Traffic,All_Traffic,src_mac,source.mac,string
Network_Traffic,All_Traffic,src_port,source.port,number
Network_Traffic,All_Traffic,src_translated_ip,source.nat.ip,string
Network_Traffic,All_Traffic,src_translated_port,source.nat.port,number
Network_Traffic,All_Traffic,transport,network.transport,string
Network_Traffic,All_Traffic,vlan,vlan.name,string
Vulnerabilities,Vulnerabilities,category,vulnerability.category,string
Vulnerabilities,Vulnerabilities,cve,vulnerability.id,string
Vulnerabilities,Vulnerabilities,cvss,vulnerability.score.base,number
Vulnerabilities,Vulnerabilities,dest,host.name,string
Vulnerabilities,Vulnerabilities,dvc,vulnerability.scanner.vendor,string
Vulnerabilities,Vulnerabilities,severity,vulnerability.severity,string
Vulnerabilities,Vulnerabilities,url,vulnerability.reference,string
Vulnerabilities,Vulnerabilities,user,related.user,string
Vulnerabilities,Vulnerabilities,vendor_product,vulnerability.scanner.vendor,string
Endpoint,Ports,creation_time,@timestamp,timestamp
Endpoint,Ports,dest_port,destination.port,number
Endpoint,Ports,process_id,process.pid,string
Endpoint,Ports,transport,network.transport,string
Endpoint,Ports,transport_dest_port,destination.port,string
Endpoint,Processes,action,event.action,string
Endpoint,Processes,os,os.full,string
Endpoint,Processes,parent_process_exec,process.parent.name,string
Endpoint,Processes,parent_process_id,process.ppid,number
Endpoint,Processes,parent_process_guid,process.parent.entity_id,string
Endpoint,Processes,parent_process_path,process.parent.executable,string
Endpoint,Processes,process_current_directory,process.parent.working_directory,
Endpoint,Processes,process_exec,process.name,string
Endpoint,Processes,process_hash,process.hash.*,string
Endpoint,Processes,process_guid,process.entity_id,string
Endpoint,Processes,process_id,process.pid,number
Endpoint,Processes,process_path,process.executable,string
Endpoint,Processes,user_id,related.user,string
Endpoint,Services,description,service.name,string
Endpoint,Services,process_id,service.id,string
Endpoint,Services,service_dll,dll.name,string
Endpoint,Services,service_dll_path,dll.path,string
Endpoint,Services,service_dll_hash,dll.hash.*,string
Endpoint,Services,service_dll_signature_exists,dll.code_signature.exists,boolean
Endpoint,Services,service_dll_signature_verified,dll.code_signature.valid,boolean
Endpoint,Services,service_exec,service.name,string
Endpoint,Services,service_hash,hash.*,string
Endpoint,Filesystem,file_access_time,file.accessed,timestamp
Endpoint,Filesystem,file_create_time,file.created,timestamp
Endpoint,Filesystem,file_modify_time,file.mtime,timestamp
Endpoint,Filesystem,process_id,process.pid,string
Endpoint,Registry,process_id,process.id,string
Web,Web,action,event.action,string
Web,Web,app,observer.product,string
Web,Web,bytes_in,http.request.bytes,number
Web,Web,bytes_out,http.response.bytes,number
Web,Web,dest,destination.ip,string
Web,Web,duration,event.duration,number
Web,Web,http_method,http.request.method,string
Web,Web,http_referrer,http.request.referrer,string
Web,Web,http_user_agent,user_agent.name,string
Web,Web,status,http.response.status_code,string
Web,Web,url,url.full,string
Web,Web,user,url.username,string
Web,Web,vendor_product,observer.product,string`;

View file

@ -5,14 +5,18 @@
* 2.0.
*/
import type { TranslateRuleState } from '../../types';
import { ChatPromptTemplate } from '@langchain/core/prompts';
export const getEsqlTranslationPrompt = (
state: TranslateRuleState,
indexPatterns: string
): string => {
return `You are a helpful cybersecurity (SIEM) expert agent. Your task is to migrate "detection rules" from Splunk to Elastic Security.
export const ESQL_TRANSLATION_PROMPT =
ChatPromptTemplate.fromTemplate(`You are a helpful cybersecurity (SIEM) expert agent. Your task is to migrate "detection rules" from Splunk to Elastic Security.
Your goal is to translate the SPL query into an equivalent Elastic Security Query Language (ES|QL) query.
Below is the relevant context used when deciding which Elastic Common Schema field to use when translating from Splunk CIM fields:
<context>
<cim_to_ecs_map>
{field_mapping}
</cim_to_ecs_map>
</context>
## Splunk rule Information provided:
- Below you will find Splunk rule information: the title (<<TITLE>>), the description (<<DESCRIPTION>>), and the SPL (Search Processing Language) query (<<SPL_QUERY>>).
@ -22,7 +26,7 @@ Your goal is to translate the SPL query into an equivalent Elastic Security Quer
## Guidelines:
- Analyze the SPL query and identify the key components.
- Translate the SPL query into an equivalent ES|QL query using ECS (Elastic Common Schema) field names.
- Always start the generated ES|QL query by filtering FROM using these index patterns in the translated query: ${indexPatterns}.
- Always start the generated ES|QL query by filtering FROM using these index patterns in the translated query: {indexPatterns}.
- If, in the SPL query, you find a lookup list or macro call, mention it in the summary and add a placeholder in the query with the format [macro:<macro_name>(argumentCount)] or [lookup:<lookup_name>] including the [] keys,
- Examples:
- \`get_duration(firstDate,secondDate)\` -> [macro:get_duration(2)]
@ -35,15 +39,14 @@ Your goal is to translate the SPL query into an equivalent Elastic Security Quer
Find the Splunk rule information below:
<<TITLE>>
${state.original_rule.title}
{title}
<</TITLE>>
<<DESCRIPTION>>
${state.original_rule.description}
{description}
<</DESCRIPTION>>
<<SPL_QUERY>>
${state.inline_query}
{inline_query}
<</SPL_QUERY>>
`;
};
`);

View file

@ -9,10 +9,11 @@ import type { Logger } from '@kbn/core/server';
import type { InferenceClient } from '@kbn/inference-plugin/server';
import { SiemMigrationRuleTranslationResult } from '../../../../../../../../../../common/siem_migrations/constants';
import type { ChatModel } from '../../../../../util/actions_client_chat';
import { getEsqlKnowledgeBase } from '../../../../../util/esql_knowledge_base_caller';
import type { RuleResourceRetriever } from '../../../../../util/rule_resource_retriever';
import type { GraphNode } from '../../types';
import { getEsqlKnowledgeBase } from './esql_knowledge_base_caller';
import { getEsqlTranslationPrompt } from './prompts';
import { SIEM_RULE_MIGRATION_CIM_ECS_MAP } from './cim_ecs_map';
import { ESQL_TRANSLATION_PROMPT } from './prompts';
interface GetTranslateRuleNodeParams {
model: ChatModel;
@ -29,12 +30,20 @@ export const getTranslateRuleNode = ({
}: GetTranslateRuleNodeParams): GraphNode => {
const esqlKnowledgeBaseCaller = getEsqlKnowledgeBase({ inferenceClient, connectorId, logger });
return async (state) => {
const indexPatterns = state.integrations.flatMap((integration) =>
integration.data_streams.map((dataStream) => dataStream.index_pattern)
);
const indexPatterns = state.integrations
.flatMap((integration) =>
integration.data_streams.map((dataStream) => dataStream.index_pattern)
)
.join(',');
const integrationIds = state.integrations.map((integration) => integration.id);
const prompt = getEsqlTranslationPrompt(state, indexPatterns.join(','));
const prompt = await ESQL_TRANSLATION_PROMPT.format({
title: state.original_rule.title,
description: state.original_rule.description,
field_mapping: SIEM_RULE_MIGRATION_CIM_ECS_MAP,
inline_query: state.inline_query,
indexPatterns,
});
const response = await esqlKnowledgeBaseCaller(prompt);
const esqlQuery = response.match(/```esql\n([\s\S]*?)\n```/)?.[1] ?? '';

View file

@ -0,0 +1,62 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { ESQLAstQueryExpression, ESQLCommandOption, EditorError } from '@kbn/esql-ast';
import { parse } from '@kbn/esql-ast';
import { isColumnItem, isOptionItem } from '@kbn/esql-validation-autocomplete';
import { isAggregatingQuery } from '@kbn/securitysolution-utils';
interface ParseEsqlQueryResult {
errors: EditorError[];
isEsqlQueryAggregating: boolean;
hasMetadataOperator: boolean;
}
function computeHasMetadataOperator(astExpression: ESQLAstQueryExpression): boolean {
// Check whether the `from` command has `metadata` operator
const metadataOption = getMetadataOption(astExpression);
if (!metadataOption) {
return false;
}
// Check whether the `metadata` operator has `_id` argument
const idColumnItem = metadataOption.args.find(
(fromArg) => isColumnItem(fromArg) && fromArg.name === '_id'
);
if (!idColumnItem) {
return false;
}
return true;
}
function getMetadataOption(astExpression: ESQLAstQueryExpression): ESQLCommandOption | undefined {
const fromCommand = astExpression.commands.find((x) => x.name === 'from');
if (!fromCommand?.args) {
return undefined;
}
// Check whether the `from` command has `metadata` operator
for (const fromArg of fromCommand.args) {
if (isOptionItem(fromArg) && fromArg.name === 'metadata') {
return fromArg;
}
}
return undefined;
}
export const parseEsqlQuery = (query: string): ParseEsqlQueryResult => {
const { root, errors } = parse(query);
const isEsqlQueryAggregating = isAggregatingQuery(root);
return {
errors,
isEsqlQueryAggregating,
hasMetadataOperator: computeHasMetadataOperator(root),
};
};

View file

@ -0,0 +1,7 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
export { getValidationNode } from './validation';

View file

@ -0,0 +1,43 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { Logger } from '@kbn/core/server';
import { isEmpty } from 'lodash/fp';
import type { GraphNode } from '../../types';
import { parseEsqlQuery } from './esql_query';
interface GetValidationNodeParams {
logger: Logger;
}
/**
* This node runs all validation steps, and will redirect to the END of the graph if no errors are found.
* Any new validation steps should be added here.
*/
export const getValidationNode = ({ logger }: GetValidationNodeParams): GraphNode => {
return async (state) => {
const query = state.elastic_rule.query;
// We want to prevent infinite loops, so we increment the iterations counter for each validation run.
const currentIteration = ++state.validation_errors.iterations;
let esqlErrors: string = '';
if (!isEmpty(query)) {
const { errors, isEsqlQueryAggregating, hasMetadataOperator } = parseEsqlQuery(query);
if (!isEmpty(errors)) {
esqlErrors = JSON.stringify(errors);
} else if (!isEsqlQueryAggregating && !hasMetadataOperator) {
esqlErrors =
'Queries that dont use the STATS...BY function (non-aggregating queries) must include the "metadata _id, _version, _index" operator after the source command. For example: FROM logs* metadata _id, _version, _index.';
}
}
if (esqlErrors) {
logger.debug(`ESQL query validation failed: ${esqlErrors}`);
}
return { validation_errors: { iterations: currentIteration, esql_errors: esqlErrors } };
};
};

View file

@ -14,6 +14,7 @@ import type {
RuleMigration,
} from '../../../../../../../../common/siem_migrations/model/rule_migration.gen';
import type { Integration } from '../../../../types';
import type { TranslateRuleValidationErrors } from './types';
export const translateRuleState = Annotation.Root({
messages: Annotation<BaseMessage[]>({
@ -33,6 +34,10 @@ export const translateRuleState = Annotation.Root({
reducer: (state, action) => ({ ...state, ...action }),
default: () => ({} as ElasticRule),
}),
validation_errors: Annotation<TranslateRuleValidationErrors>({
reducer: (current, value) => value ?? current,
default: () => ({ iterations: 0 } as TranslateRuleValidationErrors),
}),
translation_result: Annotation<SiemMigrationRuleTranslationResult>({
reducer: (current, value) => value ?? current,
default: () => SiemMigrationRuleTranslationResult.UNTRANSLATABLE,

View file

@ -23,3 +23,8 @@ export interface TranslateRuleGraphParams {
integrationRetriever: IntegrationRetriever;
logger: Logger;
}
export interface TranslateRuleValidationErrors {
iterations: number;
esql_errors?: string;
}