[Security GenAI][Integration Assistant] Fix bugs in ecs_mapping graph (#186301)

## Summary

This PR holds multiple fixes for the `ecs_mapping` graph

- Closes #185038 
- Closes #185037 

### Checklist

Delete any items that are not applicable to this PR.

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios



### For maintainers

- [ ] This was checked for breaking API changes and was [labeled
appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)

---------

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Bharat Pasupula 2024-06-18 10:55:41 +02:00 committed by GitHub
parent 202a774d46
commit 4f3e12e150
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 63 additions and 6 deletions

View file

@ -30,11 +30,12 @@ Go through each value step by step and modify it with the following process:
3. If no relevant ECS field is found, the value should just be replaced with "null" rather than a new object.
4. Only if a relevant ECS field is found replace the value with a new object that has the keys "target", "confidence", "date_format" and "type".
5. The object key "target" should be set to be the full path of the ECS field name you think it matches. Set the object key "type" to be either "string", "boolean", "number" or "date" depending on what was detected as the example value.
6. If the type "date" is used, then set date_format to be an array of one or more of the equivilant JAVA date formats that fits the example value. If the type is not date then date_format should be set to an empty array [].
7. For each key that you set a target ECS field, also score the confidence you have in that the target field is correct, use a float between 0.0 and 1.0 and set the value in the nested "confidence" key.
8. When you want to use an ECS field as a value for a target, but another field already has the same ECS field as its target, try to find another fitting ECS field. If none is found then the one you are least confident about should have the object replaced with null.
9. If you are not confident for a specific field, you should always set the value to null.
10. These {package_name} log samples are based on source and destination type data, prioritize these compared to other related ECS fields like host.* and observer.*.
6. If the type "date" is used, then set date_format to be an array of one or more of the equivilant JAVA date formats that fits the example value, including those with nanosecond precision. If the type is not date then date_format should be set to an empty array [].
7. Use a custom date pattern if the built-in date format patterns do not match the example value , including those with nanosecond precision.
8. For each key that you set a target ECS field, also score the confidence you have in that the target field is correct, use a float between 0.0 and 1.0 and set the value in the nested "confidence" key.
9. When you want to use an ECS field as a value for a target, but another field already has the same ECS field as its target, try to find another fitting ECS field. If none is found then the one you are least confident about should have the object replaced with null.
10. If you are not confident for a specific field, you should always set the value to null.
11. These {package_name} log samples are based on source and destination type data, prioritize these compared to other related ECS fields like host.* and observer.*.
You ALWAYS follow these guidelines when writing your response:
<guidelines>

View file

@ -0,0 +1,52 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { processMapping } from './validate';
describe('Testing ecs handler', () => {
it('processMapping()', async () => {
const path: string[] = [];
const value = {
checkpoint: {
firewall: {
product: null,
sequencenum: null,
subject: null,
ifdir: null,
origin: {
target: 'source.address',
confidence: 0.9,
type: 'string',
date_formats: [],
},
flags: null,
sendtotrackerasadvancedauditlog: null,
originsicname: null,
version: null,
administrator: {
target: 'user.name',
confidence: 0.8,
type: 'string',
date_formats: [],
},
foo: {
target: null, // Invalid value , to be skipped
confidence: 0.8,
type: 'string',
date_formats: [],
},
},
},
};
const output: Record<string, string[][]> = {};
await processMapping(path, value, output);
expect(output).toEqual({
'source.address': [['checkpoint', 'firewall', 'origin']],
'user.name': [['checkpoint', 'firewall', 'administrator']],
});
});
});

View file

@ -49,7 +49,11 @@ function findMissingFields(formattedSamples: string, ecsMapping: AnyObject): str
return missingKeys;
}
function processMapping(path: string[], value: any, output: Record<string, string[][]>): void {
export function processMapping(
path: string[],
value: any,
output: Record<string, string[][]>
): void {
if (typeof value === 'object' && value !== null) {
if (!Array.isArray(value)) {
// If the value is a dict with all the keys returned for each source field, this is the full path of the field.